diff --git a/.github/workflows/sync-to-hf.yml b/.github/workflows/sync-to-hf.yml new file mode 100644 index 0000000000000000000000000000000000000000..d886dc8ac5597eb11814533125ee95a93af7b3eb --- /dev/null +++ b/.github/workflows/sync-to-hf.yml @@ -0,0 +1,55 @@ +name: Sync to HuggingFace Dataset + +on: + push: + branches: [main] + paths: + - 'data/**/*.json' + workflow_dispatch: # Allow manual trigger + +jobs: + sync-to-huggingface: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 2 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + pip install datasets huggingface_hub pandas pyarrow + + - name: Convert Changed JSONs to Parquet (Optimized) + env: + HF_DATASET_REPO: deepmage121/eee_test + HF_TOKEN: ${{ secrets.HF_TOKEN }} + run: | + echo "Detecting changed leaderboards..." + python scripts/convert_to_parquet.py + + - name: Upload Changed Parquets to HuggingFace + env: + HF_DATASET_REPO: deepmage121/eee_test + HF_TOKEN: ${{ secrets.HF_TOKEN }} + run: | + echo "Uploading changed parquets..." + python scripts/upload_to_hf.py + + - name: Report status + if: success() + run: | + echo "Successfully synced to HuggingFace dataset" + echo "View at: https://huggingface.co/datasets/deepmage121/eee_test" + if [ -f parquet_output/changed_leaderboards.json ]; then + echo "" + echo "Changes processed:" + cat parquet_output/changed_leaderboards.json + fi + diff --git a/.gitignore b/.gitignore index e43b0f988953ae3a84b00331d0ccf5f7d51cb3cf..f66c0318c86fa7c5971c863094b4f0a45f2c4d01 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,9 @@ .DS_Store +.secrets +.actrc +__pycache__/ +*.pyc +parquet_output/ +*.venv* +*.md +*.ipynb_checkpoints diff --git a/.python-version b/.python-version new file mode 100644 index 0000000000000000000000000000000000000000..2c0733315e415bfb5e5b353f9996ecd964d395b2 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.11 diff --git a/app.py b/app.py index 8c64d41b1c2e8f7e742563979659bc7e7bc69215..4132024d38460461651d2af4f7f8bf3035e413bf 100644 --- a/app.py +++ b/app.py @@ -1,479 +1,106 @@ +""" +Evaluation Leaderboard - Gradio Interface +Displays model evaluation results from HuggingFace datasets. +""" import gradio as gr import pandas as pd -import json from pathlib import Path -DATA_DIR = Path("leaderboard_data") -LEADERBOARD_CACHE = {} - -def parse_eval_json(file_path): - """Parses a single JSON file to extract model, provider, and results.""" - try: - with open(file_path, 'r') as f: - data = json.load(f) - - leaderboard_name = data.get("evaluation_source", {}).get("evaluation_source_name", "Unknown Leaderboard") - provider_name = data.get("source_metadata", {}).get("source_organization_name", "Unknown Provider") - model_id = data.get("model_info", {}).get("id", "Unknown Model") - developer_name = data.get("model_info", {}).get("developer", "Unknown Developer") - - params = data.get("model_info", {}).get("params_billions", None) - architecture = data.get("model_info", {}).get("architecture", "Unknown") - precision = data.get("additional_details", {}).get("precision", "Unknown") - if precision == "Unknown": - precision = data.get("model_info", {}).get("precision", "Unknown") - - results = {} - if "evaluation_results" in data: - for res in data["evaluation_results"]: - eval_name = res.get("evaluation_name", "Unknown Metric") - score = res.get("score_details", {}).get("score", None) - if score is not None: - results[eval_name] = score - - return { - "leaderboard": leaderboard_name, - "provider": provider_name, - "model": model_id, - "developer": developer_name, - "params": params, - "architecture": architecture, - "precision": precision, - "results": results, - "raw_data": data - } - except Exception as e: - print(f"Error parsing {file_path}: {e}") - return None - -def get_available_leaderboards(): - """Scans data directory for leaderboard folders.""" - if not DATA_DIR.exists(): - return [] - return [d.name for d in DATA_DIR.iterdir() if d.is_dir()] - -def normalize_leaderboard_name(name): - """Normalizes leaderboard name to remove spaces.""" - return name.replace(" ", "") - -def sanitize_filename_component(name): - """Sanitizes a name to be safe for use in directory names.""" - return name.replace("/", "_").replace("\\", "_").replace(":", "_").strip() - -def walk_eval_files(leaderboard_name): - """Generator that walks through Leaderboard directory recursively.""" - lb_path = DATA_DIR / leaderboard_name - if not lb_path.exists(): - return - - yield from lb_path.rglob("*.json") +# Import custom modules +from data_loader import ( + load_hf_dataset_on_startup, + get_available_leaderboards, + get_eval_metadata, + build_leaderboard_table, + clear_cache, + DATA_DIR +) +from ui_components import get_theme, get_custom_css, format_leaderboard_header, format_metric_details -def get_eval_metadata(selected_leaderboard): - """Extracts evaluation metadata from the leaderboard data.""" - if not selected_leaderboard: - return {} - - eval_metadata = {"evals": {}, "source_info": {}} - - for json_file in walk_eval_files(selected_leaderboard): - parsed = parse_eval_json(json_file) - if parsed: - if not eval_metadata["source_info"]: - source_meta = parsed["raw_data"].get("source_metadata", {}) - source_data_list = parsed["raw_data"].get("source_data", []) - url = source_data_list[0] if isinstance(source_data_list, list) and source_data_list else "#" - - eval_metadata["source_info"] = { - "organization": source_meta.get("source_organization_name", "Unknown"), - "relationship": source_meta.get("evaluator_relationship", "Unknown"), - "url": url - } - - if "evaluation_results" in parsed["raw_data"]: - for res in parsed["raw_data"]["evaluation_results"]: - eval_name = res.get("evaluation_name", "Unknown Metric") - if eval_name not in eval_metadata["evals"]: - metric_config = res.get("metric_config", {}) - eval_metadata["evals"][eval_name] = { - "description": metric_config.get("evaluation_description", "No description available"), - "score_type": metric_config.get("score_type", "unknown"), - "lower_is_better": metric_config.get("lower_is_better", False), - "min_score": metric_config.get("min_score"), - "max_score": metric_config.get("max_score"), - "level_names": metric_config.get("level_names", []), - "level_metadata": metric_config.get("level_metadata", []), - "has_unknown_level": metric_config.get("has_unknown_level", False) - } - break - - return eval_metadata -def format_eval_info_html(selected_leaderboard): - """Formats evaluation metadata into a responsive HTML grid.""" +def export_leaderboard_to_json(selected_leaderboard): + """Export current leaderboard to JSON files in a zip using parquet_to_folder.""" if not selected_leaderboard: - return """ -
-

👋 Welcome to Eval Leaderboard

-

Select a leaderboard above to visualize results and metadata.

-
- """ - - metadata = get_eval_metadata(selected_leaderboard) - if not metadata or not metadata.get("evals"): - return f"""
No metadata found for {selected_leaderboard}
""" - - source_info = metadata.get("source_info", {}) - evals = metadata.get("evals", {}) - unique_evals_count = len(evals) - - eval_badges = "".join([ - f'{name}' - for name in sorted(evals.keys()) - ]) - - source_url = source_info.get('url', '#') - source_link = f'🔗 {source_info.get("organization", "Unknown")}' - - html = f""" -
-

📊 {selected_leaderboard}

-
-
-
Source Organization
-
{source_link}
-
-
-
Evaluator Relationship
-
{source_info.get('relationship', 'Unknown').replace('_', ' ').title()}
-
-
-
Included Evaluations
-
{eval_badges}
-
-
-
- -

Metric Details

- """ + return None - html += """ -
- """ + import tempfile + import shutil + import zipfile + from json_to_parquet import parquet_to_folder - for eval_name, info in evals.items(): - score_type = info['score_type'].upper() if info['score_type'] else "UNKNOWN" - direction = "Lower is better" if info['lower_is_better'] else "Higher is better" - direction_icon = "↓" if info['lower_is_better'] else "↑" + try: + # Find the parquet file in DATA_DIR + parquet_path = DATA_DIR / selected_leaderboard / f"{selected_leaderboard}.parquet" - details_content = "" - if info['score_type'] == "continuous" and info.get('min_score') is not None: - details_content += f"
Range: [{info['min_score']} - {info['max_score']}]
" - elif info['score_type'] == "levels" and info.get('level_names'): - levels = ", ".join(info['level_names']) - details_content += f"
Levels: {levels}
" + if not parquet_path.exists(): + print(f"Parquet file not found: {parquet_path}") + return None - if info.get('has_unknown_level'): - details_content += "
* -1 indicates Unknown
" - - html += f""" -
- -
- đŸˇī¸ - {eval_name} -
-
- {direction_icon} {direction} -
-
+ # Create temp directory for export + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + output_dir = temp_path / "json_export" + output_dir.mkdir() -
-

- {info['description']} -

-
-
- {details_content} -
- {score_type} -
-
-
- """ - - html += "
" - return html + # Use the round-trip functionality from json_to_parquet + parquet_to_folder(str(parquet_path), str(output_dir)) + + # Create zip file + zip_path = temp_path / f"{selected_leaderboard}_export.zip" + with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: + for json_file in output_dir.rglob("*.json"): + arcname = json_file.relative_to(output_dir) + zipf.write(json_file, arcname) + + # Copy to a permanent location for download + final_zip = Path(tempfile.gettempdir()) / f"{selected_leaderboard}_export.zip" + shutil.copy(zip_path, final_zip) + + return str(final_zip) + except Exception as e: + print(f"Export error: {e}") + return None + -def update_leaderboard_table(selected_leaderboard, search_query="", group_by_model=False, progress=gr.Progress()): +def update_leaderboard_table(selected_leaderboard, search_query="", progress=gr.Progress()): """Loads and aggregates data for the selected leaderboard.""" if not selected_leaderboard: - return pd.DataFrame(), format_eval_info_html(None) + return pd.DataFrame(), "", format_leaderboard_header(None, {}), format_metric_details(None, {}) - # Check cache - full_df = None - if selected_leaderboard in LEADERBOARD_CACHE: - # Cache stores (df, meta_html) - full_df, meta_html = LEADERBOARD_CACHE[selected_leaderboard] - else: - progress(0, desc=f"Scanning {selected_leaderboard}...") - all_files = list(walk_eval_files(selected_leaderboard)) - total_files = len(all_files) - - rows = [] - for i, json_file in enumerate(all_files): - if i % 100 == 0: - progress((i / total_files), desc=f"Loading {selected_leaderboard}...") - parsed = parse_eval_json(json_file) - if parsed: - row = { - "Model": parsed["model"], - "Developer": parsed["developer"], - "Params (B)": parsed["params"], - "Arch": parsed["architecture"], - "Precision": parsed["precision"] - } - row.update(parsed["results"]) - rows.append(row) - - meta_html = format_eval_info_html(selected_leaderboard) - - if not rows: - full_df = pd.DataFrame(columns=["Model", "Developer", "Params (B)", "Arch", "Precision", "Score"]) - else: - full_df = pd.DataFrame(rows) - numeric_cols = full_df.select_dtypes(include=['float', 'int']).columns - full_df[numeric_cols] = full_df[numeric_cols].round(3) - - LEADERBOARD_CACHE[selected_leaderboard] = (full_df, meta_html) - - # Filter by search query - df = full_df.copy() - if search_query: - df = df[ - df["Model"].str.contains(search_query, case=False, na=False) | - df["Developer"].str.contains(search_query, case=False, na=False) - ] - - # Group by model and average scores if requested - if group_by_model and not df.empty: - # Identify grouping columns (non-numeric usually, or specific base cols) - # We group by the base identifiers. - base_cols_all = ["Model", "Developer", "Params (B)", "Arch", "Precision"] - group_cols = [c for c in base_cols_all if c in df.columns] - - # Identify columns to average (numeric) - numeric_cols = df.select_dtypes(include=['number']).columns - # Exclude group_cols from numeric_cols if they happen to be numeric (like Params) - # But groupby keys can be numeric. - # We want to average the SCORES. - # Any numeric column NOT in group_cols should be averaged. - agg_cols = [c for c in numeric_cols if c not in group_cols] - - if group_cols and agg_cols: - df = df.groupby(group_cols)[agg_cols].mean().reset_index() - df = df.round(3) - - # Drop columns where all values are null - df = df.dropna(axis=1, how='all') - - if df.empty: - return df, meta_html - - # Filter base_cols to only include columns that exist in df (in case some were dropped) - base_cols = [c for c in ["Model", "Developer", "Params (B)", "Arch", "Precision"] if c in df.columns] - eval_cols = [c for c in df.columns if c not in base_cols] - - cols = base_cols + eval_cols - return df[cols], meta_html - -def find_json_files(path): - """Recursively finds all JSON files in a directory or returns the file if it's a JSON file.""" - json_files = [] - path_obj = Path(path) - - if path_obj.is_file() and path_obj.suffix == ".json": - json_files.append(path_obj) - elif path_obj.is_dir(): - json_files.extend(path_obj.rglob("*.json")) - - return json_files - -def check_is_duplicate(save_dir, new_eval_id): - """Checks if a file with the same evaluation_id already exists in the directory.""" - if not new_eval_id or not save_dir.exists(): - return False - - for existing_file in save_dir.glob("*.json"): - try: - with open(existing_file, 'r') as f: - data = json.load(f) - if data.get("evaluation_id") == new_eval_id: - return True - except: - continue - return False - -def handle_file_upload(files, progress=gr.Progress()): - """Processes uploaded files/folders and saves them to the correct structure. + metadata = get_eval_metadata(selected_leaderboard) - Structure: Leaderboard/Provider/Model/.json - Preserves original filename (which already contains the UUID). - """ - if not files: - return gr.update(), "No files uploaded." + def progress_callback(value, desc): + progress(value, desc=desc) - saved_count = 0 - all_json_files = [] - skipped_count = 0 - duplicate_count = 0 + df = build_leaderboard_table(selected_leaderboard, "", progress_callback) + total_count = len(df) - progress(0, desc="Scanning files...") - for file_obj in files: - path = file_obj.name if hasattr(file_obj, "name") else file_obj - json_files = find_json_files(path) - - if Path(path).is_file() and Path(path).suffix != ".json": - skipped_count += 1 - - all_json_files.extend(json_files) + # Apply search filter (searches all columns) + if search_query and not df.empty: + mask = df.astype(str).apply(lambda row: row.str.contains(search_query, case=False, na=False).any(), axis=1) + df = df[mask] - total_files = len(all_json_files) - for i, json_file in enumerate(all_json_files): - progress((i / total_files), desc=f"Processing {json_file.name}...") - try: - parsed = parse_eval_json(json_file) - if not parsed: - continue - - leaderboard = normalize_leaderboard_name(parsed["leaderboard"]) - provider = parsed["provider"] - model_id = parsed["model"] - developer = parsed["developer"] - eval_id = parsed["raw_data"].get("evaluation_id") - - # Sanitize names for directory structure - sanitized_provider = sanitize_filename_component(developer) - sanitized_model = sanitize_filename_component(model_id) - - # Create structure: Leaderboard/Developer/Model - save_dir = DATA_DIR / leaderboard / sanitized_provider / sanitized_model - save_dir.mkdir(parents=True, exist_ok=True) - - # Check for duplicates based on evaluation_id - if check_is_duplicate(save_dir, eval_id): - duplicate_count += 1 - continue - - # Preserve original filename - filename = json_file.name - save_path = save_dir / filename - - # Avoid overwriting by appending counter - counter = 1 - while save_path.exists(): - stem = save_path.stem.rsplit('_', 1)[0] if '_' in save_path.stem else save_path.stem - save_path = save_dir / f"{stem}_{counter}.json" - counter += 1 - - with open(save_path, 'w') as f: - json.dump(parsed["raw_data"], f, indent=2) - - saved_count += 1 - - except Exception as e: - print(f"Failed to save {json_file}: {e}") - - # Clear cache since data changed - LEADERBOARD_CACHE.clear() - - # Refresh leaderboard choices - choices = get_available_leaderboards() + # Build search status message + if search_query: + search_msg = f"Showing {len(df)} of {total_count} results for '{search_query}'" + else: + search_msg = f"Showing {len(df)} results" - msg_parts = [f"Processed {saved_count} files."] - if duplicate_count > 0: - msg_parts.append(f"Skipped {duplicate_count} duplicates.") - if skipped_count > 0: - msg_parts.append(f"Skipped {skipped_count} non-JSON files.") - - return gr.Dropdown(choices=choices), " ".join(msg_parts), None, None + return df, search_msg, format_leaderboard_header(selected_leaderboard, metadata), format_metric_details(selected_leaderboard, metadata) -# Professional, high-contrast theme -theme = gr.themes.Soft( - primary_hue="slate", - neutral_hue="slate", - font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"] -).set( - body_background_fill="var(--neutral-50)", - block_background_fill="white", - block_border_width="1px", - block_title_text_weight="600" -) - -css = """ -/* Clean up the global container */ -.gradio-container { - max-width: 100% !important; - padding: 0 2rem !important; -} - -/* Table Styles */ -.dataframe { - border: 1px solid var(--border-color-primary) !important; - border-radius: 8px; -} -/* Hide file list in uploaders */ -.file-preview { - display: none !important; -} -""" +# Load HF dataset BEFORE building the interface +load_hf_dataset_on_startup() -with gr.Blocks(title="Eval Leaderboard", theme=theme, css=css) as demo: +# Build Gradio interface +with gr.Blocks(title="Eval Leaderboard", theme=get_theme(), css=get_custom_css()) as demo: with gr.Row(variant="compact", elem_classes="header-row"): with gr.Column(scale=1): gr.Markdown("# 🏆 Evaluation Leaderboard") gr.Markdown("Analyze and compare model performance metrics.", elem_classes="subtitle") - with gr.Row(variant="panel", equal_height=True): + with gr.Row(variant="panel"): initial_choices = get_available_leaderboards() initial_value = initial_choices[0] if initial_choices else None @@ -482,56 +109,51 @@ with gr.Blocks(title="Eval Leaderboard", theme=theme, css=css) as demo: choices=initial_choices, value=initial_value, label="Current Leaderboard", - interactive=True, - container=False, - scale=1 - ) - with gr.Column(scale=2): - search_box = gr.Textbox( - label="Search Model/Developer", - placeholder="🔍 Search model or developer...", - show_label=False, - container=False, - scale=1 - ) - with gr.Column(scale=1, min_width=100): - group_by_model = gr.Checkbox( - label="Average by Model", - value=False, - container=False + interactive=True ) - with gr.Column(scale=1, min_width=100): + with gr.Column(scale=3): + search_box = gr.Textbox( + label="Search", + placeholder="Type to search across all columns...", + show_label=False + ) + with gr.Column(scale=1): refresh_btn = gr.Button("🔄 Refresh", variant="secondary", size="sm") - with gr.Accordion("📤 Upload New Data", open=False): - upload_mode = gr.Radio( - choices=["Files", "Folder"], - value="Files", - label="Upload Mode", - info="Choose 'Files' for individual JSONs, or 'Folder' to upload a directory structure." - ) - - with gr.Group(visible=True) as file_upload_group: - file_uploader_files = gr.File( - file_count="multiple", - file_types=[".json"], - label="Select JSON Files" - ) - - with gr.Group(visible=False) as folder_upload_group: - file_uploader_folder = gr.File( - file_count="directory", - label="Select Folder" - ) - - upload_status = gr.Textbox( - label="Upload Status", - interactive=False - ) + with gr.Accordion("â„šī¸ How to Submit Data", open=False): + gr.Markdown(""" +### Submitting Evaluation Data - init_df, init_meta = update_leaderboard_table(initial_value) +**Data submissions happen via GitHub Pull Requests:** + +1. **Fork** [evaleval/every_eval_ever](https://github.com/evaleval/every_eval_ever) +2. **Add your JSON files** to `data////` +3. **Create a Pull Request** +4. **Automated validation** checks your data +5. **After merge**: GitHub Actions automatically syncs to HuggingFace +6. **Refresh this page** to see your data! + +#### File Structure +``` +data/ +└── YourBenchmark/ + └── developer_name/ + └── model_name/ + └── {uuid}.json +``` + +Each JSON file should follow the schema and be named with a unique UUID. + +📖 [**Full Submission Guide**](https://github.com/evaleval/every_eval_ever#contributor-guide) | +📋 [**JSON Schema**](https://github.com/evaleval/every_eval_ever/blob/main/eval.schema.json) | +👀 [**See Examples**](https://github.com/evaleval/every_eval_ever/tree/main/data) + """) + + init_df, init_search_msg, init_header, init_metrics = update_leaderboard_table(initial_value) + + header_view = gr.HTML(value=init_header) - metadata_view = gr.HTML(value=init_meta) + search_info = gr.Markdown(value=init_search_msg) leaderboard_table = gr.Dataframe( value=init_df, @@ -541,59 +163,33 @@ with gr.Blocks(title="Eval Leaderboard", theme=theme, css=css) as demo: elem_classes="dataframe" ) - def toggle_upload_input(mode): - return { - file_upload_group: gr.Group(visible=(mode == "Files")), - folder_upload_group: gr.Group(visible=(mode == "Folder")) - } + metrics_view = gr.HTML(value=init_metrics) - upload_mode.change( - fn=toggle_upload_input, - inputs=[upload_mode], - outputs=[file_upload_group, folder_upload_group] - ) - - file_uploader_files.upload( - fn=handle_file_upload, - inputs=[file_uploader_files], - outputs=[leaderboard_selector, upload_status, file_uploader_files, file_uploader_folder] - ) - - file_uploader_folder.upload( - fn=handle_file_upload, - inputs=[file_uploader_folder], - outputs=[leaderboard_selector, upload_status, file_uploader_files, file_uploader_folder] - ) + # Event handlers leaderboard_selector.change( fn=update_leaderboard_table, - inputs=[leaderboard_selector, search_box, group_by_model], - outputs=[leaderboard_table, metadata_view] - ) - - search_box.change( - fn=update_leaderboard_table, - inputs=[leaderboard_selector, search_box, group_by_model], - outputs=[leaderboard_table, metadata_view] + inputs=[leaderboard_selector, search_box], + outputs=[leaderboard_table, search_info, header_view, metrics_view] ) - group_by_model.change( - fn=update_leaderboard_table, - inputs=[leaderboard_selector, search_box, group_by_model], - outputs=[leaderboard_table, metadata_view] + search_box.input( + fn=update_leaderboard_table, + inputs=[leaderboard_selector, search_box], + outputs=[leaderboard_table, search_info, header_view, metrics_view] ) refresh_btn.click( - fn=lambda: (gr.Dropdown(choices=get_available_leaderboards()), "Refreshed."), - outputs=[leaderboard_selector, upload_status] + fn=lambda: gr.Dropdown(choices=get_available_leaderboards()), + outputs=[leaderboard_selector] ).then( - fn=lambda: LEADERBOARD_CACHE.clear() + fn=lambda: clear_cache() ).then( fn=update_leaderboard_table, - inputs=[leaderboard_selector, search_box, group_by_model], - outputs=[leaderboard_table, metadata_view] + inputs=[leaderboard_selector, search_box], + outputs=[leaderboard_table, search_info, header_view, metrics_view] ) - + DATA_DIR.mkdir(exist_ok=True) if __name__ == "__main__": diff --git a/data_loader.py b/data_loader.py new file mode 100644 index 0000000000000000000000000000000000000000..510e9e03d657bd0ebe9f27b9cc44bd47cf2e933a --- /dev/null +++ b/data_loader.py @@ -0,0 +1,317 @@ +""" +Data Loader: Load from HuggingFace, parse JSON files, and build tables. +""" +import json +import pandas as pd +from pathlib import Path +from datasets import load_dataset + + +# Global caches +HF_DATASET_CACHE = {} +LEADERBOARD_CACHE = {} +DATA_DIR = Path("leaderboard_data") + + +def load_hf_dataset_on_startup(): + """Load all splits from HuggingFace dataset at startup.""" + print("Loading dataset from HuggingFace...") + try: + dataset = load_dataset("deepmage121/eee_test") + + for split_name, split_data in dataset.items(): + print(f"Loading split: {split_name} ({len(split_data)} rows)") + + df = split_data.to_pandas() + parsed_items = [] + + for _, row in df.iterrows(): + evaluation_results = json.loads(row['evaluation_results']) + + results = {} + for eval_result in evaluation_results: + eval_name = eval_result.get("evaluation_name") + score = eval_result.get("score_details", {}).get("score") + if eval_name and score is not None: + results[eval_name] = score + + additional_details = {} + if pd.notna(row.get('additional_details')): + additional_details = json.loads(row['additional_details']) + + parsed_item = { + "leaderboard": row['_leaderboard'], + "provider": row['source_organization_name'], + "model": row['model_id'], + "developer": row['model_developer'], + "params": additional_details.get('params_billions'), + "architecture": additional_details.get('architecture', 'Unknown'), + "precision": additional_details.get('precision', 'Unknown'), + "results": results, + "raw_data": { + "schema_version": row['schema_version'], + "evaluation_id": row['evaluation_id'], + "retrieved_timestamp": row['retrieved_timestamp'], + "source_data": json.loads(row['source_data']), + "evaluation_source": { + "evaluation_source_name": row['evaluation_source_name'], + "evaluation_source_type": row['evaluation_source_type'] + }, + "source_metadata": { + "source_organization_name": row['source_organization_name'], + "evaluator_relationship": row['evaluator_relationship'], + }, + "model_info": { + "name": row['model_name'], + "id": row['model_id'], + "developer": row['model_developer'], + }, + "evaluation_results": evaluation_results, + "additional_details": additional_details + } + } + + if pd.notna(row.get('source_organization_url')): + parsed_item["raw_data"]["source_metadata"]["source_organization_url"] = row['source_organization_url'] + if pd.notna(row.get('source_organization_logo_url')): + parsed_item["raw_data"]["source_metadata"]["source_organization_logo_url"] = row['source_organization_logo_url'] + if pd.notna(row.get('model_inference_platform')): + parsed_item["raw_data"]["model_info"]["inference_platform"] = row['model_inference_platform'] + + parsed_items.append(parsed_item) + + HF_DATASET_CACHE[split_name] = parsed_items + + print(f"Loaded {len(HF_DATASET_CACHE)} leaderboard(s) from HuggingFace") + return True + except Exception as e: + print(f"Warning: Could not load HuggingFace dataset: {e}") + print("Falling back to local file system...") + return False + + +def parse_eval_json(file_path): + """Parses a single JSON file to extract model, provider, and results.""" + try: + with open(file_path, 'r') as f: + data = json.load(f) + + leaderboard_name = data.get("evaluation_source", {}).get("evaluation_source_name", "Unknown Leaderboard") + provider_name = data.get("source_metadata", {}).get("source_organization_name", "Unknown Provider") + model_id = data.get("model_info", {}).get("id", "Unknown Model") + developer_name = data.get("model_info", {}).get("developer", "Unknown Developer") + + params = data.get("model_info", {}).get("params_billions", None) + architecture = data.get("model_info", {}).get("architecture", "Unknown") + precision = data.get("additional_details", {}).get("precision", "Unknown") + if precision == "Unknown": + precision = data.get("model_info", {}).get("precision", "Unknown") + + results = {} + if "evaluation_results" in data: + for res in data["evaluation_results"]: + eval_name = res.get("evaluation_name", "Unknown Metric") + score = res.get("score_details", {}).get("score", None) + if score is not None: + results[eval_name] = score + + return { + "leaderboard": leaderboard_name, + "provider": provider_name, + "model": model_id, + "developer": developer_name, + "params": params, + "architecture": architecture, + "precision": precision, + "results": results, + "raw_data": data + } + except Exception as e: + print(f"Error parsing {file_path}: {e}") + return None + + +def get_available_leaderboards(): + """Returns available leaderboards from HF cache or local directory.""" + if HF_DATASET_CACHE: + return list(HF_DATASET_CACHE.keys()) + + if not DATA_DIR.exists(): + return [] + return [d.name for d in DATA_DIR.iterdir() if d.is_dir()] + + +def walk_eval_files(leaderboard_name): + """Generator that walks through Leaderboard directory recursively.""" + lb_path = DATA_DIR / leaderboard_name + if not lb_path.exists(): + return + yield from lb_path.rglob("*.json") + + +def get_eval_metadata(selected_leaderboard): + """Extracts evaluation metadata from the leaderboard data.""" + if not selected_leaderboard: + return {} + + eval_metadata = {"evals": {}, "source_info": {}} + + if selected_leaderboard in HF_DATASET_CACHE: + parsed_items = HF_DATASET_CACHE[selected_leaderboard] + if parsed_items: + parsed = parsed_items[0] + + source_meta = parsed["raw_data"].get("source_metadata", {}) + source_data_list = parsed["raw_data"].get("source_data", []) + url = source_data_list[0] if isinstance(source_data_list, list) and source_data_list else "#" + + eval_metadata["source_info"] = { + "organization": source_meta.get("source_organization_name", "Unknown"), + "relationship": source_meta.get("evaluator_relationship", "Unknown"), + "url": url + } + + if "evaluation_results" in parsed["raw_data"]: + for res in parsed["raw_data"]["evaluation_results"]: + eval_name = res.get("evaluation_name", "Unknown Metric") + if eval_name not in eval_metadata["evals"]: + metric_config = res.get("metric_config", {}) + eval_metadata["evals"][eval_name] = { + "description": metric_config.get("evaluation_description", "No description available"), + "score_type": metric_config.get("score_type", "unknown"), + "lower_is_better": metric_config.get("lower_is_better", False), + "min_score": metric_config.get("min_score"), + "max_score": metric_config.get("max_score"), + "level_names": metric_config.get("level_names", []), + "level_metadata": metric_config.get("level_metadata", []), + "has_unknown_level": metric_config.get("has_unknown_level", False) + } + return eval_metadata + + # Fall back to file system + for json_file in walk_eval_files(selected_leaderboard): + parsed = parse_eval_json(json_file) + if parsed: + if not eval_metadata["source_info"]: + source_meta = parsed["raw_data"].get("source_metadata", {}) + source_data_list = parsed["raw_data"].get("source_data", []) + url = source_data_list[0] if isinstance(source_data_list, list) and source_data_list else "#" + + eval_metadata["source_info"] = { + "organization": source_meta.get("source_organization_name", "Unknown"), + "relationship": source_meta.get("evaluator_relationship", "Unknown"), + "url": url + } + + if "evaluation_results" in parsed["raw_data"]: + for res in parsed["raw_data"]["evaluation_results"]: + eval_name = res.get("evaluation_name", "Unknown Metric") + if eval_name not in eval_metadata["evals"]: + metric_config = res.get("metric_config", {}) + eval_metadata["evals"][eval_name] = { + "description": metric_config.get("evaluation_description", "No description available"), + "score_type": metric_config.get("score_type", "unknown"), + "lower_is_better": metric_config.get("lower_is_better", False), + "min_score": metric_config.get("min_score"), + "max_score": metric_config.get("max_score"), + "level_names": metric_config.get("level_names", []), + "level_metadata": metric_config.get("level_metadata", []), + "has_unknown_level": metric_config.get("has_unknown_level", False) + } + break + + return eval_metadata + + +def build_leaderboard_table(selected_leaderboard, search_query="", progress_callback=None): + """Builds the leaderboard DataFrame from cache or files.""" + if not selected_leaderboard: + return pd.DataFrame() + + if selected_leaderboard in LEADERBOARD_CACHE: + df, _ = LEADERBOARD_CACHE[selected_leaderboard] + else: + rows = [] + + if selected_leaderboard in HF_DATASET_CACHE: + if progress_callback: + progress_callback(0, desc=f"Loading {selected_leaderboard} from cache...") + + parsed_items = HF_DATASET_CACHE[selected_leaderboard] + + for i, parsed in enumerate(parsed_items): + if i % 100 == 0 and progress_callback: + progress_callback((i / len(parsed_items)), desc=f"Processing {selected_leaderboard}...") + + row = { + "Model": parsed["model"], + "Developer": parsed["developer"], + "Params (B)": parsed["params"], + "Arch": parsed["architecture"], + "Precision": parsed["precision"] + } + row.update(parsed["results"]) + rows.append(row) + else: + # Fall back to file system + if progress_callback: + progress_callback(0, desc=f"Scanning {selected_leaderboard}...") + + all_files = list(walk_eval_files(selected_leaderboard)) + total_files = len(all_files) + + for i, json_file in enumerate(all_files): + if i % 100 == 0 and progress_callback: + progress_callback((i / total_files), desc=f"Loading {selected_leaderboard}...") + + parsed = parse_eval_json(json_file) + if parsed: + row = { + "Model": parsed["model"], + "Developer": parsed["developer"], + "Params (B)": parsed["params"], + "Arch": parsed["architecture"], + "Precision": parsed["precision"] + } + row.update(parsed["results"]) + rows.append(row) + + if not rows: + df = pd.DataFrame(columns=["Model", "Developer", "Params (B)", "Arch", "Precision"]) + LEADERBOARD_CACHE[selected_leaderboard] = (df, None) + return df + + df = pd.DataFrame(rows) + df = df.dropna(axis=1, how='all') + + if df.empty: + LEADERBOARD_CACHE[selected_leaderboard] = (df, None) + return df + + numeric_cols = df.select_dtypes(include=['float', 'int']).columns + df[numeric_cols] = df[numeric_cols].round(3) + + # Add Average Score + eval_only_cols = [c for c in numeric_cols if c not in ["Params (B)"]] + if len(eval_only_cols) > 0: + df["Average"] = df[eval_only_cols].mean(axis=1).round(3) + + base_cols = ["Model", "Developer", "Params (B)", "Arch", "Precision", "Average"] + eval_cols = [c for c in df.columns if c not in base_cols] + base_cols = [c for c in base_cols if c in df.columns] + + final_cols = base_cols + sorted(eval_cols) + df = df[final_cols] + + if "Average" in df.columns: + df = df.sort_values("Average", ascending=False) + + LEADERBOARD_CACHE[selected_leaderboard] = (df, None) + + return df + + +def clear_cache(): + """Clears all caches.""" + LEADERBOARD_CACHE.clear() + diff --git a/eval.schema.json b/eval.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..4be0e6ff925ed642124d84e36d4e5e467c71060e --- /dev/null +++ b/eval.schema.json @@ -0,0 +1,282 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "version": "0.0.1", + "type": "object", + "description": "Schema for storing and validating LLMs evaluation data, including model configuration, prompts, instances, Output, and evaluation metrics", + "required": [ + "schema_version", + "evaluation_id", + "evaluation_source", + "retrieved_timestamp", + "source_data", + "source_metadata", + "model_info", + "evaluation_results" + ], + "properties": { + "schema_version": { + "type": "string", + "description": "Version of the schema used for this evaluation data" + }, + "evaluation_id": { + "type": "string", + "description": "Unique identifier for this specific evaluation run. Use org_name/eval_name/retrieved_timestamp format" + }, + "retrieved_timestamp": { + "type": "string", + "description": "Timestamp for when this record was created" + }, + "source_data": { + "type": "array", + "description": "URLs for the source of the evaluation data", + "items": { + "type": "string" + } + }, + "evaluation_source": { + "type": "object", + "description": "Details about evaluation origin. There are options that evaluations come from leaderboards (e.g. Live Code Bench Pro) or evaluation platforms (e.g. lm-eval, inspect ai, HELM...).", + "required": [ + "evaluation_source_name", + "evaluation_source_type" + ], + "properties": { + "evaluation_source_name": { + "type": "string", + "description": "Name of the source (e.g. title of the source leaderboard or name of the platform used for the evaluation." + }, + "evaluation_source_type": { + "type": "string", + "enum": [ + "leaderboard", + "evaluation_platform" + ], + "description": "Type of evaluation source, e.g., leaderboard or evaluation platform" + } + } + }, + "source_metadata": { + "type": "object", + "description": "Metadata about the source of the leaderboard data", + "required": [ + "source_organization_name", + "evaluator_relationship" + ], + "properties": { + "source_organization_name": { + "type": "string", + "description": "Name of the organization that provides the data" + }, + "source_organization_url": { + "type": "string", + "description": "URL for the organization that provides the data" + }, + "source_organization_logo_url": { + "type": "string", + "description": "URL for the Logo for the organization that provides the data" + }, + "evaluator_relationship": { + "type": "string", + "description": "Relationship between the evaluator and the model", + "enum": [ + "first_party", + "third_party", + "collaborative", + "other" + ] + } + } + }, + "model_info": { + "type": "object", + "description": "Complete model specification including basic information, technical configuration and inference settings", + "required": [ + "name", + "id" + ], + "properties": { + "name": { + "type": "string", + "description": "Model name provided by evaluation source" + }, + "id": { + "type": "string", + "description": "Model name standarized to HuggingFace format (e.g. meta-llama/Llama-3.1-8B-Instruct)" + }, + "developer": { + "type": "string", + "description": "Name of organization that provides the model (e.g. 'OpenAI')" + }, + "inference_platform": { + "type": "string", + "description": "Description of platform used to run the evaluations (e.g. local machine, Bedrock)" + } + } + }, + "evaluation_results": { + "type": "array", + "description": "Array of evaluation results", + "items": { + "type": "object", + "required": [ + "evaluation_name", + "metric_config", + "score_details" + ], + "properties": { + "evaluation_name": { + "type": "string", + "description": "Name of the evaluation" + }, + "evaluation_timestamp": { + "type": "string", + "description": "Timestamp for when the evaluations were run" + }, + "metric_config": { + "type": "object", + "description": "Details about the metric", + "required": [ + "lower_is_better" + ], + "properties": { + "evaluation_description": { + "type": "string", + "description": "Description of the evaluation" + }, + "lower_is_better": { + "type": "boolean", + "description": "Whether a lower score is better" + }, + "score_type": { + "type": "string", + "description": "Type of score", + "enum": [ + "binary", + "continuous", + "levels" + ] + }, + "level_names": { + "type": "array", + "description": "Names of the score levels", + "items": { + "type": "string" + } + }, + "level_metadata": { + "type": "array", + "description": "Additional Description for each Score Level", + "items": { + "type": "string" + } + }, + "has_unknown_level": { + "type": "boolean", + "description": "Indicates whether there is an Unknown Level - if True, then a score of -1 will be treated as Unknown" + }, + "min_score": { + "type": "number", + "description": "Minimum possible score for continuous metric" + }, + "max_score": { + "type": "number", + "description": "Maximum possible score for continuous metric" + } + }, + "if": { + "properties": { + "score_type": { + "const": "levels" + } + } + }, + "then": { + "required": [ + "level_names", + "has_unknown_level" + ] + }, + "else": { + "if": { + "properties": { + "score_type": { + "const": "continuous" + } + } + }, + "then": { + "required": [ + "min_score", + "max_score" + ] + } + } + }, + "score_details": { + "type": "object", + "description": "The score for the evaluation and related details", + "required": [ + "score" + ], + "properties": { + "score": { + "type": "number", + "description": "The score for the evaluation" + }, + "details": { + "type": "object", + "description": "Any additional details about the score", + "additionalProperties": true + } + } + }, + "detailed_evaluation_results_url": { + "type": "string", + "description": "Link to detailed evaluation data" + }, + "generation_config": { + "type": "object", + "generation_args": { + "type": "object", + "description": "Parameters used to generate results - properties may vary by model type", + "properties": { + "temperature": { + "type": [ + "null", + "number" + ], + "description": "Sampling temperature" + }, + "top_p": { + "type": [ + "null", + "number" + ], + "description": "Nucleus sampling parameter" + }, + "top_k": { + "type": [ + "null", + "number" + ], + "description": "Top-k sampling parameter" + }, + "max_tokens": { + "type": "integer", + "minimum": 1, + "description": "Maximum number of tokens to generate" + } + }, + "additionalProperties": true + }, + "additional_details": { + "type": "string", + "description": "Additional details about how the results for this metric were generated." + } + } + } + } + + } + } +} diff --git a/hf_operations.py b/hf_operations.py new file mode 100644 index 0000000000000000000000000000000000000000..18d0fc4eb6b2e1cc40336d3686ddd027adeaac66 --- /dev/null +++ b/hf_operations.py @@ -0,0 +1,202 @@ +""" +HuggingFace Operations: Upload data, create PRs, validate schemas. +""" +from huggingface_hub import HfApi, login +import pandas as pd +import json +from pathlib import Path +from jsonschema import validate, ValidationError, Draft7Validator + + +# Load schema once at module level +SCHEMA_PATH = Path(__file__).parent / "eval.schema.json" +with open(SCHEMA_PATH, 'r') as f: + EVAL_SCHEMA = json.load(f) + + +def validate_json_against_schema(json_data): + """ + Validate a JSON object against eval.schema.json. + + Args: + json_data: Dict containing the evaluation data + + Returns: + (bool, str): (is_valid, error_message) + """ + try: + validate(instance=json_data, schema=EVAL_SCHEMA) + return True, "Schema validation passed" + except ValidationError as e: + # Extract the most relevant error message + error_path = " → ".join(str(p) for p in e.path) if e.path else "root" + return False, f"❌ Schema validation failed at '{error_path}': {e.message}" + except Exception as e: + return False, f"❌ Validation error: {str(e)}" + + +def upload_to_hf_dataset(parquet_file, split_name, repo_id="deepmage121/eee_test"): + """ + Upload a parquet file as a new split to the HF dataset. + + Args: + parquet_file: Path to parquet file + split_name: Name of the split (leaderboard name) + repo_id: HuggingFace dataset repository ID + """ + # TODO: Implement upload logic + pass + + +def check_hf_authentication(): + """ + Check if user is authenticated with HuggingFace. + + Returns: + (bool, str): (is_authenticated, username or error_message) + """ + try: + api = HfApi() + user_info = api.whoami() + return True, user_info['name'] + except Exception as e: + return False, "Not authenticated. Run: huggingface-cli login" + + +def check_duplicate_pr_exists(leaderboard_name, repo_id="deepmage121/eee_test"): + """ + Check if a PR already exists for this leaderboard. + + Args: + leaderboard_name: Name of the leaderboard + repo_id: HuggingFace dataset repository ID + + Returns: + (bool, str or None): (exists, pr_url if exists) + """ + try: + api = HfApi() + discussions = api.get_repo_discussions(repo_id=repo_id, repo_type="dataset") + + # Check for open PRs with matching title + pr_title_pattern = f"add new leaderboard: {leaderboard_name.lower()}" + for discussion in discussions: + if discussion.is_pull_request and discussion.status == "open": + if pr_title_pattern in discussion.title.lower(): + pr_url = f"https://huggingface.co/datasets/{repo_id}/discussions/{discussion.num}" + return True, pr_url + + return False, None + except Exception as e: + # If we can't check, assume no duplicate (fail open) + print(f"Warning: Could not check for duplicate PRs: {e}") + return False, None + + +def create_pr_for_new_leaderboard(leaderboard_name, parquet_file, repo_id="deepmage121/eee_test"): + """ + Create a pull request to add a new leaderboard split. + + Args: + leaderboard_name: Name of the new leaderboard + parquet_file: Path to parquet file + repo_id: HuggingFace dataset repository ID + + Returns: + (success, pr_url or error_message) + """ + # 1. Check authentication + is_auth, auth_result = check_hf_authentication() + if not is_auth: + return False, f"❌ {auth_result}" + + # 2. Check for duplicate PR + has_duplicate, duplicate_url = check_duplicate_pr_exists(leaderboard_name, repo_id) + if has_duplicate: + return False, f"âš ī¸ PR already exists: {duplicate_url}" + + # 3. Validate parquet file exists and has data + parquet_path = Path(parquet_file) + if not parquet_path.exists(): + return False, "❌ Parquet file not found" + + df = pd.read_parquet(parquet_file) + if len(df) == 0: + return False, "❌ Parquet file is empty" + + # 4. Create PR + try: + api = HfApi() + + # Upload the parquet file to the branch + commit_message = f"Add new leaderboard: {leaderboard_name}" + + # Upload file and create PR + commit_info = api.upload_file( + path_or_fileobj=parquet_file, + path_in_repo=f"data/{leaderboard_name}.parquet", + repo_id=repo_id, + repo_type="dataset", + commit_message=commit_message, + create_pr=True, + ) + + # Extract PR URL from commit info + pr_url = commit_info.pr_url if hasattr(commit_info, 'pr_url') else f"https://huggingface.co/datasets/{repo_id}/discussions" + + return True, f"PR created ({len(df)} rows): {pr_url}" + + except Exception as e: + return False, f"❌ Failed to create PR: {str(e)}" + + +def validate_schema(parquet_file): + """ + Validate that a parquet file matches the expected schema. + + Args: + parquet_file: Path to parquet file to validate + + Returns: + (bool, str): (is_valid, error_message) + """ + try: + df = pd.read_parquet(parquet_file) + + # Required columns + required_cols = [ + '_leaderboard', '_developer', '_model', '_uuid', + 'schema_version', 'evaluation_id', 'retrieved_timestamp', + 'source_data', 'evaluation_source_name', 'evaluation_source_type', + 'source_organization_name', 'evaluator_relationship', + 'model_name', 'model_id', 'model_developer', + 'evaluation_results' + ] + + missing = [col for col in required_cols if col not in df.columns] + if missing: + return False, f"Missing required columns: {', '.join(missing)}" + + # Check data types (all should be strings) + for col in df.columns: + if df[col].dtype not in ['object', 'string']: + return False, f"Column '{col}' has wrong type: {df[col].dtype} (expected string)" + + return True, "Schema validation passed" + + except Exception as e: + return False, f"Validation error: {str(e)}" + + +def export_to_json(parquet_file, output_dir): + """ + Export parquet data back to JSON files. + Uses the parquet_to_folder function from json_to_parquet.py + + Args: + parquet_file: Path to parquet file + output_dir: Directory to write JSON files to + """ + from json_to_parquet import parquet_to_folder + parquet_to_folder(parquet_file, output_dir) + diff --git a/json_to_parquet.py b/json_to_parquet.py new file mode 100644 index 0000000000000000000000000000000000000000..f1b701fa8a354831c11579e49042235dd8982b94 --- /dev/null +++ b/json_to_parquet.py @@ -0,0 +1,228 @@ + + +import json +from pathlib import Path +import pandas as pd + + +def json_to_row(json_path: Path) -> dict: + """Convert one JSON to a single row (1 JSON = 1 row, evaluations as columns).""" + with open(json_path, 'r') as f: + data = json.load(f) + + required_fields = ["schema_version", "evaluation_id", "evaluation_source", "retrieved_timestamp", + "source_data", "source_metadata", "model_info", "evaluation_results"] + for field in required_fields: + if field not in data: + raise ValueError(f"{json_path}: Missing required field '{field}'") + + if "evaluation_source_name" not in data["evaluation_source"]: + raise ValueError(f"{json_path}: Missing required field 'evaluation_source.evaluation_source_name'") + if "evaluation_source_type" not in data["evaluation_source"]: + raise ValueError(f"{json_path}: Missing required field 'evaluation_source.evaluation_source_type'") + + if "source_organization_name" not in data["source_metadata"]: + raise ValueError(f"{json_path}: Missing required field 'source_metadata.source_organization_name'") + if "evaluator_relationship" not in data["source_metadata"]: + raise ValueError(f"{json_path}: Missing required field 'source_metadata.evaluator_relationship'") + + if "name" not in data["model_info"]: + raise ValueError(f"{json_path}: Missing required field 'model_info.name'") + if "id" not in data["model_info"]: + raise ValueError(f"{json_path}: Missing required field 'model_info.id'") + if "developer" not in data["model_info"]: + raise ValueError(f"{json_path}: Missing required field 'model_info.developer'") + + leaderboard = data["evaluation_source"]["evaluation_source_name"] + model = data["model_info"]["id"] + uuid = json_path.stem + developer = data["model_info"]["developer"] + + # Validate evaluation results + for eval_result in data["evaluation_results"]: + if "evaluation_name" not in eval_result: + raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].evaluation_name'") + if "metric_config" not in eval_result: + raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].metric_config'") + if "score_details" not in eval_result: + raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].score_details'") + + if "lower_is_better" not in eval_result["metric_config"]: + raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].metric_config.lower_is_better'") + if "score" not in eval_result["score_details"]: + raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].score_details.score'") + + row = { + # Folder structure (for reconstruction) + "_leaderboard": leaderboard, + "_developer": developer, + "_model": model, + "_uuid": uuid, + + # Required top-level fields + "schema_version": data["schema_version"], + "evaluation_id": data["evaluation_id"], + "retrieved_timestamp": data["retrieved_timestamp"], + "source_data": json.dumps(data["source_data"]), + + # Required nested fields + "evaluation_source_name": data["evaluation_source"]["evaluation_source_name"], + "evaluation_source_type": data["evaluation_source"]["evaluation_source_type"], + + "source_organization_name": data["source_metadata"]["source_organization_name"], + "source_organization_url": data["source_metadata"].get("source_organization_url"), + "source_organization_logo_url": data["source_metadata"].get("source_organization_logo_url"), + "evaluator_relationship": data["source_metadata"]["evaluator_relationship"], + + "model_name": data["model_info"]["name"], + "model_id": data["model_info"]["id"], + "model_developer": data["model_info"]["developer"], + "model_inference_platform": data["model_info"].get("inference_platform"), + + # Store full evaluation_results and additional_details as JSON + "evaluation_results": json.dumps(data["evaluation_results"]), + "additional_details": json.dumps(data["additional_details"]) if "additional_details" in data else None, + } + + return row + + +def add_to_parquet(json_or_folder: str, parquet_file: str): + """ + Add JSON(s) to Parquet file. + Creates new file if it doesn't exist, appends and deduplicates if it does. + + Args: + json_or_folder: Path to single JSON file or folder containing JSONs + parquet_file: Output Parquet file path + """ + input_path = Path(json_or_folder) + + if input_path.is_file(): + json_files = [input_path] + elif input_path.is_dir(): + json_files = list(input_path.rglob("*.json")) + if not json_files: + raise ValueError(f"No JSON files found in directory: {json_or_folder}") + else: + raise ValueError(f"Invalid input: {json_or_folder}") + + print(f"Processing {len(json_files)} JSON file(s)...") + + parquet_path = Path(parquet_file) + if parquet_path.exists(): + existing_df = pd.read_parquet(parquet_file) + existing_keys = set( + existing_df[["_leaderboard", "_developer", "_model", "_uuid"]] + .apply(tuple, axis=1) + ) + print(f"Found {len(existing_df)} existing rows") + else: + existing_df = None + existing_keys = set() + + all_rows = [] + skipped = 0 + for i, jf in enumerate(json_files, 1): + if i % 100 == 0: + print(f" {i}/{len(json_files)}") + + row = json_to_row(jf) + key = (row["_leaderboard"], row["_developer"], row["_model"], row["_uuid"]) + if key not in existing_keys: + all_rows.append(row) + existing_keys.add(key) + else: + skipped += 1 + + if skipped > 0: + print(f" Skipped {skipped} duplicate file(s)") + + # Handle case where no new rows to add + if not all_rows: + if existing_df is not None: + print(f"No new files to add, keeping existing {len(existing_df)} file(s)") + return + else: + raise ValueError("No valid JSON files to process and no existing parquet file") + + new_df = pd.DataFrame(all_rows) + + if existing_df is not None: + df = pd.concat([existing_df, new_df], ignore_index=True) + print(f"Added {len(new_df)} new file(s) to existing {len(existing_df)} file(s)") + else: + df = new_df + + df.to_parquet(parquet_file, index=False) + print(f"Saved {len(df)} total file(s) to {parquet_file} ({parquet_path.stat().st_size / 1024 / 1024:.1f} MB)") + + +def parquet_to_folder(parquet_file: str, output_dir: str): + """Reconstruct folder structure from Parquet.""" + df = pd.read_parquet(parquet_file) + out = Path(output_dir) + + for _, row in df.iterrows(): + lb = row["_leaderboard"] + dev = row["_developer"] + model = row["_model"] + uuid = row["_uuid"] + + json_data = { + "schema_version": row["schema_version"], + "evaluation_id": row["evaluation_id"], + "retrieved_timestamp": row["retrieved_timestamp"], + "source_data": json.loads(row["source_data"]), + "evaluation_source": { + "evaluation_source_name": row["evaluation_source_name"], + "evaluation_source_type": row["evaluation_source_type"] + }, + "source_metadata": { + "source_organization_name": row["source_organization_name"], + "evaluator_relationship": row["evaluator_relationship"] + }, + "model_info": { + "name": row["model_name"], + "id": row["model_id"], + "developer": row["model_developer"] + }, + "evaluation_results": json.loads(row["evaluation_results"]) + } + + if pd.notna(row["source_organization_url"]): + json_data["source_metadata"]["source_organization_url"] = row["source_organization_url"] + if pd.notna(row["source_organization_logo_url"]): + json_data["source_metadata"]["source_organization_logo_url"] = row["source_organization_logo_url"] + + if pd.notna(row["model_inference_platform"]): + json_data["model_info"]["inference_platform"] = row["model_inference_platform"] + + if pd.notna(row["additional_details"]): + json_data["additional_details"] = json.loads(row["additional_details"]) + + file_path = out / lb / dev / model / f"{uuid}.json" + file_path.parent.mkdir(parents=True, exist_ok=True) + with open(file_path, 'w') as f: + json.dump(json_data, f, indent=2) + + print(f"Reconstructed {len(df)} files to {output_dir}") + + +if __name__ == "__main__": + import sys + + if len(sys.argv) < 2: + print("Usage:") + print(" python json_to_parquet.py add ") + print(" python json_to_parquet.py export ") + sys.exit(1) + + cmd = sys.argv[1] + + if cmd == "add": + add_to_parquet(sys.argv[2], sys.argv[3]) + elif cmd == "export": + parquet_to_folder(sys.argv[2], sys.argv[3]) + else: + print(f"Unknown command: {cmd}") diff --git a/leaderboard_data/HFOpenLLMv2/0-hero/0-hero_Matter-0.2-7B-DPO/40e80d5e-db72-46b7-bd14-b7d005df4be8.json b/leaderboard_data/HFOpenLLMv2/0-hero/0-hero_Matter-0.2-7B-DPO/40e80d5e-db72-46b7-bd14-b7d005df4be8.json deleted file mode 100644 index 13d42abffb5b6dec5b881d249e70ecf1598aaeae..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/0-hero/0-hero_Matter-0.2-7B-DPO/40e80d5e-db72-46b7-bd14-b7d005df4be8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/0-hero_Matter-0.2-7B-DPO/1762652579.4626381", - "retrieved_timestamp": "1762652579.462642", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "0-hero/Matter-0.2-7B-DPO", - "developer": "0-hero", - "inference_platform": "unknown", - "id": "0-hero/Matter-0.2-7B-DPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3302792147058693 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3596254301656297 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.381375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163563829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B-32K/0d91a153-1b6b-4891-8722-a5c7e372ba64.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B-32K/0d91a153-1b6b-4891-8722-a5c7e372ba64.json deleted file mode 100644 index 80547e421e154b508a24aebfce93d4238b937691..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B-32K/0d91a153-1b6b-4891-8722-a5c7e372ba64.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-34B-32K/1762652579.463656", - "retrieved_timestamp": "1762652579.463657", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "01-ai/Yi-1.5-34B-32K", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-1.5-34B-32K" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3118691737922047 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6015685776542417 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1540785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36325503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4398229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4709109042553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B-Chat-16K/2192007d-1f6e-4f74-b518-7448ef3a896e.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B-Chat-16K/2192007d-1f6e-4f74-b518-7448ef3a896e.json deleted file mode 100644 index d0dd58d6ee88c61b554b2e20e6cd035cf66c34a4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B-Chat-16K/2192007d-1f6e-4f74-b518-7448ef3a896e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-34B-Chat-16K/1762652579.464125", - "retrieved_timestamp": "1762652579.4641259", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "01-ai/Yi-1.5-34B-Chat-16K", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-1.5-34B-Chat-16K" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.456449997118756 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6100218256499571 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21374622356495468 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43976041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45445478723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B-Chat/e335874b-9b3e-4966-a7e0-22e9d16f8324.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B-Chat/e335874b-9b3e-4966-a7e0-22e9d16f8324.json deleted file mode 100644 index e872cb56c27fa3d3098d01803b28e9be7dbd8b1c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B-Chat/e335874b-9b3e-4966-a7e0-22e9d16f8324.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-34B-Chat/1762652579.463886", - "retrieved_timestamp": "1762652579.4638872", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "01-ai/Yi-1.5-34B-Chat", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-1.5-34B-Chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6066758423205982 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6083748310271819 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.277190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3649328859060403 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4281979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45204454787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B/8409c158-ef12-4e6c-8a1d-7be2084b3446.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B/8409c158-ef12-4e6c-8a1d-7be2084b3446.json deleted file mode 100644 index 09588b7620fa9279af91f885eb3775bc4b3ee9f9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B/8409c158-ef12-4e6c-8a1d-7be2084b3446.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-34B/1762652579.4633532", - "retrieved_timestamp": "1762652579.463354", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "01-ai/Yi-1.5-34B", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-1.5-34B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2841172533322695 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5976391706360018 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15332326283987915 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36577181208053694 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4236041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4665890957446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-6B-Chat/3452e57f-3023-4e2e-ad84-b09e409fe334.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-6B-Chat/3452e57f-3023-4e2e-ad84-b09e409fe334.json deleted file mode 100644 index 7d05d24e5b234d149a660e3f6fcf983a780fdcb5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-6B-Chat/3452e57f-3023-4e2e-ad84-b09e409fe334.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-6B-Chat/1762652579.464571", - "retrieved_timestamp": "1762652579.464572", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "01-ai/Yi-1.5-6B-Chat", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-1.5-6B-Chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5145270105542183 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4571311331954389 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1623867069486405 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43917708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3193151595744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.061 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-6B/1a1f1263-96b6-4e32-a2c8-6c0d6b47dff9.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-6B/1a1f1263-96b6-4e32-a2c8-6c0d6b47dff9.json deleted file mode 100644 index 802fda80e1c556a6e6c2f86cb20923f65ca6c80f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-6B/1a1f1263-96b6-4e32-a2c8-6c0d6b47dff9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-6B/1762652579.464354", - "retrieved_timestamp": "1762652579.464355", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "01-ai/Yi-1.5-6B", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-1.5-6B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26166017278598563 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44925820198929056 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43740625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31441156914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.061 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B-32K/df9d9d44-daa1-4e61-9b46-192380043889.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B-32K/df9d9d44-daa1-4e61-9b46-192380043889.json deleted file mode 100644 index 9e8fb948cee6a95485a44cadd575e54f755f41a7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B-32K/df9d9d44-daa1-4e61-9b46-192380043889.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-9B-32K/1762652579.4649951", - "retrieved_timestamp": "1762652579.464996", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "01-ai/Yi-1.5-9B-32K", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-1.5-9B-32K" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23031113002389217 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.496332115988265 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35906040268456374 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4186145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37649601063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B-Chat-16K/090c9691-4b7e-4a98-b9a2-644e21797be4.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B-Chat-16K/090c9691-4b7e-4a98-b9a2-644e21797be4.json deleted file mode 100644 index 9f8095e138984210fb35cd26340a79758a1b12a6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B-Chat-16K/090c9691-4b7e-4a98-b9a2-644e21797be4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-9B-Chat-16K/1762652579.465471", - "retrieved_timestamp": "1762652579.465471", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "01-ai/Yi-1.5-9B-Chat-16K", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-1.5-9B-Chat-16K" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4214040966856829 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5153383364651778 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1782477341389728 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40990624999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39935172872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B-Chat/9256c32b-d956-418f-97da-ea78e3ad9e48.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B-Chat/9256c32b-d956-418f-97da-ea78e3ad9e48.json deleted file mode 100644 index ff3ac391bcccc9fe3eee9293aeedb40aff1fb3bc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B-Chat/9256c32b-d956-418f-97da-ea78e3ad9e48.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-9B-Chat/1762652579.465226", - "retrieved_timestamp": "1762652579.465226", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "01-ai/Yi-1.5-9B-Chat", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-1.5-9B-Chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6045525871354672 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.555906430281685 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2258308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42590625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39752327127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B/904d1f91-3153-49d5-afd3-9921bfc086f1.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B/904d1f91-3153-49d5-afd3-9921bfc086f1.json deleted file mode 100644 index 9a4e6bc7a3662a1c66d580025c652df1dae25728..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B/904d1f91-3153-49d5-afd3-9921bfc086f1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-9B/1762652579.464781", - "retrieved_timestamp": "1762652579.464782", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "01-ai/Yi-1.5-9B", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-1.5-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29358435617494916 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.514294179104191 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43278124999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3916223404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-34B-200K/fb2ebd9a-f5b8-42a2-9b58-e6f0e7d9b98a.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-34B-200K/fb2ebd9a-f5b8-42a2-9b58-e6f0e7d9b98a.json deleted file mode 100644 index 5655666ddc0b371f6f2d8b95b176f1b8807ad32f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-34B-200K/fb2ebd9a-f5b8-42a2-9b58-e6f0e7d9b98a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-34B-200K/1762652579.465893", - "retrieved_timestamp": "1762652579.465894", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "01-ai/Yi-34B-200K", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-34B-200K" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15424850507763843 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5441817925289527 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565436241610738 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171874999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45345744680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-34B-Chat/5d9b9217-874b-426d-8af4-5105a3b1b3ad.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-34B-Chat/5d9b9217-874b-426d-8af4-5105a3b1b3ad.json deleted file mode 100644 index b31034cb091a3f033e623a876161ecd928080812..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-34B-Chat/5d9b9217-874b-426d-8af4-5105a3b1b3ad.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-34B-Chat/1762652579.466115", - "retrieved_timestamp": "1762652579.4661162", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "01-ai/Yi-34B-Chat", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-34B-Chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4698887839820565 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5560872910766164 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39784375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4093251329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-34B/3ebcbf3d-cb2d-4332-bb8a-1db104033391.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-34B/3ebcbf3d-cb2d-4332-bb8a-1db104033391.json deleted file mode 100644 index b6ff74f37afa9da113b821db4eecd9ebd7f9877f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-34B/3ebcbf3d-cb2d-4332-bb8a-1db104033391.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-34B/1762652579.4656792", - "retrieved_timestamp": "1762652579.46568", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "01-ai/Yi-34B", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-34B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3045751938190667 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5457099951794562 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36661073825503354 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4118541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.441156914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-6B-200K/6b720e8b-aab8-4ba4-9bce-e7a1de3cfb86.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-6B-200K/6b720e8b-aab8-4ba4-9bce-e7a1de3cfb86.json deleted file mode 100644 index 992afa07503d5e311dac51ec373d85c721355d5c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-6B-200K/6b720e8b-aab8-4ba4-9bce-e7a1de3cfb86.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-6B-200K/1762652579.4665558", - "retrieved_timestamp": "1762652579.466557", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "01-ai/Yi-6B-200K", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-6B-200K" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08433068702154728 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42892948109603307 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45873958333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2844082446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.061 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-6B-Chat/1120c801-7736-4d9d-b23d-08eeedb34186.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-6B-Chat/1120c801-7736-4d9d-b23d-08eeedb34186.json deleted file mode 100644 index 791c74d16759b5d384bb9850098575eee927cdbc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-6B-Chat/1120c801-7736-4d9d-b23d-08eeedb34186.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-6B-Chat/1762652579.466805", - "retrieved_timestamp": "1762652579.466806", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "01-ai/Yi-6B-Chat", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-6B-Chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33952135888331847 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41326019207548687 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36879166666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3061003989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.061 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-6B/297419fa-855c-4eae-ad7c-3cf4a0262450.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-6B/297419fa-855c-4eae-ad7c-3cf4a0262450.json deleted file mode 100644 index fc94c92f131b6fd66dc3adf0ee7fbc6cb5d1e6d1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-6B/297419fa-855c-4eae-ad7c-3cf4a0262450.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-6B/1762652579.4663382", - "retrieved_timestamp": "1762652579.4663382", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "01-ai/Yi-6B", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-6B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28933784580468713 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4309230591000865 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39368749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29911901595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.061 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-9B-200K/4299df04-495a-4687-b143-96b1b562d5e8.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-9B-200K/4299df04-495a-4687-b143-96b1b562d5e8.json deleted file mode 100644 index 08d337e8165a0cb928d3e015bec507e4bbabb20b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-9B-200K/4299df04-495a-4687-b143-96b1b562d5e8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-9B-200K/1762652579.467233", - "retrieved_timestamp": "1762652579.467233", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "01-ai/Yi-9B-200K", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-9B-200K" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23270921155866434 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4793302602023641 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42940625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36220079787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-9B/0ec59add-f9a9-4dbd-8a83-c6aec0b8ad21.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-9B/0ec59add-f9a9-4dbd-8a83-c6aec0b8ad21.json deleted file mode 100644 index 6bafb1fe4e4497ec973917b08944bbbb864902dd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-9B/0ec59add-f9a9-4dbd-8a83-c6aec0b8ad21.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-9B/1762652579.46702", - "retrieved_timestamp": "1762652579.4670231", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "01-ai/Yi-9B", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2708779372066118 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49396075125308075 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40540624999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35738031914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-Coder-9B-Chat/ef0cc3a5-0d62-4a45-b0c7-28a6f7dfdac4.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-Coder-9B-Chat/ef0cc3a5-0d62-4a45-b0c7-28a6f7dfdac4.json deleted file mode 100644 index ea42c85e8c6e20238dd372bf7561d806af618d60..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-Coder-9B-Chat/ef0cc3a5-0d62-4a45-b0c7-28a6f7dfdac4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/01-ai_Yi-Coder-9B-Chat/1762652579.4674509", - "retrieved_timestamp": "1762652579.4674518", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "01-ai/Yi-Coder-9B-Chat", - "developer": "01-ai", - "inference_platform": "unknown", - "id": "01-ai/Yi-Coder-9B-Chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4817041006750976 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48142000339111674 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3991770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24251994680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/1-800-LLMs/1-800-LLMs_Qwen-2.5-14B-Hindi-Custom-Instruct/a48b0864-76b7-4860-a448-942a8d74f68e.json b/leaderboard_data/HFOpenLLMv2/1-800-LLMs/1-800-LLMs_Qwen-2.5-14B-Hindi-Custom-Instruct/a48b0864-76b7-4860-a448-942a8d74f68e.json deleted file mode 100644 index 6bc93d8c248fcee6de4f98e6940a93b0d8ab8e24..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/1-800-LLMs/1-800-LLMs_Qwen-2.5-14B-Hindi-Custom-Instruct/a48b0864-76b7-4860-a448-942a8d74f68e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/1-800-LLMs_Qwen-2.5-14B-Hindi-Custom-Instruct/1762652579.468073", - "retrieved_timestamp": "1762652579.468074", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct", - "developer": "1-800-LLMs", - "inference_platform": "unknown", - "id": "1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30774677854758703 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6284322714967584 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699664429530201 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4490625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.516373005319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/152334H/152334H_miqu-1-70b-sf/f57d7b8d-85d5-4e0b-8dec-31e2931487dd.json b/leaderboard_data/HFOpenLLMv2/152334H/152334H_miqu-1-70b-sf/f57d7b8d-85d5-4e0b-8dec-31e2931487dd.json deleted file mode 100644 index 8b265339c8235a8aeb6c70e5c84c8ccead9aa3cc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/152334H/152334H_miqu-1-70b-sf/f57d7b8d-85d5-4e0b-8dec-31e2931487dd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/152334H_miqu-1-70b-sf/1762652579.469194", - "retrieved_timestamp": "1762652579.469195", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "152334H/miqu-1-70b-sf", - "developer": "152334H", - "inference_platform": "unknown", - "id": "152334H/miqu-1-70b-sf" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5181740005407873 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6102361685099691 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45820833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42278922872340424 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 68.977 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/1TuanPham/1TuanPham_T-VisStar-7B-v0.1/1347cd1b-2ebc-4223-900f-7c2479e228a3.json b/leaderboard_data/HFOpenLLMv2/1TuanPham/1TuanPham_T-VisStar-7B-v0.1/1347cd1b-2ebc-4223-900f-7c2479e228a3.json deleted file mode 100644 index 4e3d74e80449b3f2267bcf508b5c2ee7c169702f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/1TuanPham/1TuanPham_T-VisStar-7B-v0.1/1347cd1b-2ebc-4223-900f-7c2479e228a3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/1TuanPham_T-VisStar-7B-v0.1/1762652579.469481", - "retrieved_timestamp": "1762652579.469482", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "1TuanPham/T-VisStar-7B-v0.1", - "developer": "1TuanPham", - "inference_platform": "unknown", - "id": "1TuanPham/T-VisStar-7B-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36070404305021786 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5052203113352468 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3210605053191489 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.294 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/1TuanPham/1TuanPham_T-VisStar-v0.1/b2926dd6-628c-4274-b0e8-1efc64269bb2.json b/leaderboard_data/HFOpenLLMv2/1TuanPham/1TuanPham_T-VisStar-v0.1/b2926dd6-628c-4274-b0e8-1efc64269bb2.json deleted file mode 100644 index c479ea18409a706ef7ef8eba7db256cdc3459334..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/1TuanPham/1TuanPham_T-VisStar-v0.1/b2926dd6-628c-4274-b0e8-1efc64269bb2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/1TuanPham_T-VisStar-v0.1/1762652579.469921", - "retrieved_timestamp": "1762652579.469923", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "1TuanPham/T-VisStar-v0.1", - "developer": "1TuanPham", - "inference_platform": "unknown", - "id": "1TuanPham/T-VisStar-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36070404305021786 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5052203113352468 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3210605053191489 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.294 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/3rd-Degree-Burn/3rd-Degree-Burn_L-3.1-Science-Writer-8B/0c4fd071-b5c9-4bf1-a1d5-d658be1a3258.json b/leaderboard_data/HFOpenLLMv2/3rd-Degree-Burn/3rd-Degree-Burn_L-3.1-Science-Writer-8B/0c4fd071-b5c9-4bf1-a1d5-d658be1a3258.json deleted file mode 100644 index d5171a37caa6daa46fba3351cd16213633d0a162..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/3rd-Degree-Burn/3rd-Degree-Burn_L-3.1-Science-Writer-8B/0c4fd071-b5c9-4bf1-a1d5-d658be1a3258.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/3rd-Degree-Burn_L-3.1-Science-Writer-8B/1762652579.470164", - "retrieved_timestamp": "1762652579.470165", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "3rd-Degree-Burn/L-3.1-Science-Writer-8B", - "developer": "3rd-Degree-Burn", - "inference_platform": "unknown", - "id": "3rd-Degree-Burn/L-3.1-Science-Writer-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42625012743963797 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5041306326216103 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3959479166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36494348404255317 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/4season/4season_final_model_test_v2/74973e37-cd82-4e8a-816a-02b035fabff4.json b/leaderboard_data/HFOpenLLMv2/4season/4season_final_model_test_v2/74973e37-cd82-4e8a-816a-02b035fabff4.json deleted file mode 100644 index 864adc997ad1c4dbbc4a39bad72c6909d695c171..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/4season/4season_final_model_test_v2/74973e37-cd82-4e8a-816a-02b035fabff4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/4season_final_model_test_v2/1762652579.4714398", - "retrieved_timestamp": "1762652579.4714408", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "4season/final_model_test_v2", - "developer": "4season", - "inference_platform": "unknown", - "id": "4season/final_model_test_v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3191132860809319 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6342049783295018 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08383685800604229 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4314479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3528091755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 21.421 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/AALF/AALF_FuseChat-Llama-3.1-8B-Instruct-preview/3766e8a0-99ad-4733-a01b-ced446b15eda.json b/leaderboard_data/HFOpenLLMv2/AALF/AALF_FuseChat-Llama-3.1-8B-Instruct-preview/3766e8a0-99ad-4733-a01b-ced446b15eda.json deleted file mode 100644 index aa5ed6ea3e10809249d9a3d226a303a3d8e47760..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/AALF/AALF_FuseChat-Llama-3.1-8B-Instruct-preview/3766e8a0-99ad-4733-a01b-ced446b15eda.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AALF_FuseChat-Llama-3.1-8B-Instruct-preview/1762652579.471838", - "retrieved_timestamp": "1762652579.471839", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AALF/FuseChat-Llama-3.1-8B-Instruct-preview", - "developer": "AALF", - "inference_platform": "unknown", - "id": "AALF/FuseChat-Llama-3.1-8B-Instruct-preview" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7189579205397235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5119887898349903 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24773413897280966 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38200000000000006 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3732546542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/AALF/AALF_FuseChat-Llama-3.1-8B-SFT-preview/342ac912-805f-4166-b8f4-10f0503fa892.json b/leaderboard_data/HFOpenLLMv2/AALF/AALF_FuseChat-Llama-3.1-8B-SFT-preview/342ac912-805f-4166-b8f4-10f0503fa892.json deleted file mode 100644 index 832cd58cda82acbcd1ee342ddea5c37885e0a536..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/AALF/AALF_FuseChat-Llama-3.1-8B-SFT-preview/342ac912-805f-4166-b8f4-10f0503fa892.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AALF_FuseChat-Llama-3.1-8B-SFT-preview/1762652579.472149", - "retrieved_timestamp": "1762652579.47215", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AALF/FuseChat-Llama-3.1-8B-SFT-preview", - "developer": "AALF", - "inference_platform": "unknown", - "id": "AALF/FuseChat-Llama-3.1-8B-SFT-preview" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7280504616639405 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5240303130445233 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22507552870090636 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40199999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37433510638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/AGI-0/AGI-0_Art-v0-3B/162b6d5f-f983-4989-9603-f6baea26b633.json b/leaderboard_data/HFOpenLLMv2/AGI-0/AGI-0_Art-v0-3B/162b6d5f-f983-4989-9603-f6baea26b633.json deleted file mode 100644 index 1b54cc9efcea78b4e962f9fd13ba9de61bcb55c6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/AGI-0/AGI-0_Art-v0-3B/162b6d5f-f983-4989-9603-f6baea26b633.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AGI-0_Art-v0-3B/1762652579.473539", - "retrieved_timestamp": "1762652579.47354", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AGI-0/Art-v0-3B", - "developer": "AGI-0", - "inference_platform": "unknown", - "id": "AGI-0/Art-v0-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.319238509377341 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3400959483013824 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24622356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3768229166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11785239361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/AI-MO/AI-MO_NuminaMath-7B-CoT/9ac2ba3c-9a21-46b2-a21c-4909cfae6315.json b/leaderboard_data/HFOpenLLMv2/AI-MO/AI-MO_NuminaMath-7B-CoT/9ac2ba3c-9a21-46b2-a21c-4909cfae6315.json deleted file mode 100644 index cf802649493e5d01f917b41b3b91a0b739007589..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/AI-MO/AI-MO_NuminaMath-7B-CoT/9ac2ba3c-9a21-46b2-a21c-4909cfae6315.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AI-MO_NuminaMath-7B-CoT/1762652579.474318", - "retrieved_timestamp": "1762652579.4743192", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AI-MO/NuminaMath-7B-CoT", - "developer": "AI-MO", - "inference_platform": "unknown", - "id": "AI-MO/NuminaMath-7B-CoT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2688544173903022 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4314193495860012 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26963746223564955 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33034375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28681848404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.91 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/AI-MO/AI-MO_NuminaMath-7B-TIR/0ffa78d4-fe45-4639-bcd1-eb19ab168a35.json b/leaderboard_data/HFOpenLLMv2/AI-MO/AI-MO_NuminaMath-7B-TIR/0ffa78d4-fe45-4639-bcd1-eb19ab168a35.json deleted file mode 100644 index 8f2466fce1f00d8e6ea8aadb27c886cb4cff3998..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/AI-MO/AI-MO_NuminaMath-7B-TIR/0ffa78d4-fe45-4639-bcd1-eb19ab168a35.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AI-MO_NuminaMath-7B-TIR/1762652579.474566", - "retrieved_timestamp": "1762652579.474567", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AI-MO/NuminaMath-7B-TIR", - "developer": "AI-MO", - "inference_platform": "unknown", - "id": "AI-MO/NuminaMath-7B-TIR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27562423259174545 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41436913375897894 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1608761329305136 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35092708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2732712765957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.91 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/AI-Sweden-Models/AI-Sweden-Models_Llama-3-8B-instruct/1d68bd2e-de6e-4327-a8f1-33322eba537e.json b/leaderboard_data/HFOpenLLMv2/AI-Sweden-Models/AI-Sweden-Models_Llama-3-8B-instruct/1d68bd2e-de6e-4327-a8f1-33322eba537e.json deleted file mode 100644 index b8946e8a679d3b6c3a53ee6ee208d33ca44b5d9f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/AI-Sweden-Models/AI-Sweden-Models_Llama-3-8B-instruct/1d68bd2e-de6e-4327-a8f1-33322eba537e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AI-Sweden-Models_Llama-3-8B-instruct/1762652579.474785", - "retrieved_timestamp": "1762652579.474786", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AI-Sweden-Models/Llama-3-8B-instruct", - "developer": "AI-Sweden-Models", - "inference_platform": "unknown", - "id": "AI-Sweden-Models/Llama-3-8B-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24012841482821137 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4173460154515302 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47709375000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25972406914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/AI4free/AI4free_Dhanishtha/a554a3eb-943c-4135-966b-929129ef025d.json b/leaderboard_data/HFOpenLLMv2/AI4free/AI4free_Dhanishtha/a554a3eb-943c-4135-966b-929129ef025d.json deleted file mode 100644 index 833d8203f648056b75d94c7f4c964322825e378d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/AI4free/AI4free_Dhanishtha/a554a3eb-943c-4135-966b-929129ef025d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AI4free_Dhanishtha/1762652579.475332", - "retrieved_timestamp": "1762652579.475332", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AI4free/Dhanishtha", - "developer": "AI4free", - "inference_platform": "unknown", - "id": "AI4free/Dhanishtha" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2451240486353985 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34039444943326375 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25604229607250756 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35694791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16431183510638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/AI4free/AI4free_t2/332ccdb5-faf5-47c6-afeb-a91d2148adf0.json b/leaderboard_data/HFOpenLLMv2/AI4free/AI4free_t2/332ccdb5-faf5-47c6-afeb-a91d2148adf0.json deleted file mode 100644 index 802924140cbd165c059621d12120ec7a04d9c9af..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/AI4free/AI4free_t2/332ccdb5-faf5-47c6-afeb-a91d2148adf0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AI4free_t2/1762652579.475577", - "retrieved_timestamp": "1762652579.475578", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AI4free/t2", - "developer": "AI4free", - "inference_platform": "unknown", - "id": "AI4free/t2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3866828902866616 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2910111436321769 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18957703927492447 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3846354166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11436170212765957 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/AIDC-AI/AIDC-AI_Marco-o1/17f7398f-675d-4b38-b233-64fc106737c3.json b/leaderboard_data/HFOpenLLMv2/AIDC-AI/AIDC-AI_Marco-o1/17f7398f-675d-4b38-b233-64fc106737c3.json deleted file mode 100644 index 354dbfb93a0b9e5a30df5e169f609fec654ac096..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/AIDC-AI/AIDC-AI_Marco-o1/17f7398f-675d-4b38-b233-64fc106737c3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AIDC-AI_Marco-o1/1762652579.47579", - "retrieved_timestamp": "1762652579.4757912", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AIDC-AI/Marco-o1", - "developer": "AIDC-AI", - "inference_platform": "unknown", - "id": "AIDC-AI/Marco-o1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.477083028586373 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5364362696398749 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37462235649546827 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41384375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41165226063829785 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Aashraf995/Aashraf995_Creative-7B-nerd/7ea9f4db-5b52-40a5-904e-785e43302934.json b/leaderboard_data/HFOpenLLMv2/Aashraf995/Aashraf995_Creative-7B-nerd/7ea9f4db-5b52-40a5-904e-785e43302934.json deleted file mode 100644 index 60c612ebe48d7ddd24f4cf7c53bfecfd42c7753b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Aashraf995/Aashraf995_Creative-7B-nerd/7ea9f4db-5b52-40a5-904e-785e43302934.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Aashraf995_Creative-7B-nerd/1762652579.476046", - "retrieved_timestamp": "1762652579.476046", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Aashraf995/Creative-7B-nerd", - "developer": "Aashraf995", - "inference_platform": "unknown", - "id": "Aashraf995/Creative-7B-nerd" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4721871301480073 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5606785565640195 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3164652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4515416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44921875 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/AbacusResearch/AbacusResearch_Jallabi-34B/76397277-901a-4ad0-9dae-0351ca875ec6.json b/leaderboard_data/HFOpenLLMv2/AbacusResearch/AbacusResearch_Jallabi-34B/76397277-901a-4ad0-9dae-0351ca875ec6.json deleted file mode 100644 index c6cbac5b44ade247d6d8ec32750614dc468a4564..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/AbacusResearch/AbacusResearch_Jallabi-34B/76397277-901a-4ad0-9dae-0351ca875ec6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AbacusResearch_Jallabi-34B/1762652579.477037", - "retrieved_timestamp": "1762652579.4770381", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AbacusResearch/Jallabi-34B", - "developer": "AbacusResearch", - "inference_platform": "unknown", - "id": "AbacusResearch/Jallabi-34B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3528604103777976 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6023380603196266 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3389261744966443 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48217708333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4681682180851064 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Ahdoot/Ahdoot_StructuredThinker-v0.3-MoreStructure/81a5aafb-2cf7-490d-b619-ce638fcc8b38.json b/leaderboard_data/HFOpenLLMv2/Ahdoot/Ahdoot_StructuredThinker-v0.3-MoreStructure/81a5aafb-2cf7-490d-b619-ce638fcc8b38.json deleted file mode 100644 index a56c303730d937c9b2ce456ef250f14002e8ec08..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Ahdoot/Ahdoot_StructuredThinker-v0.3-MoreStructure/81a5aafb-2cf7-490d-b619-ce638fcc8b38.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Ahdoot_StructuredThinker-v0.3-MoreStructure/1762652579.4772868", - "retrieved_timestamp": "1762652579.477288", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Ahdoot/StructuredThinker-v0.3-MoreStructure", - "developer": "Ahdoot", - "inference_platform": "unknown", - "id": "Ahdoot/StructuredThinker-v0.3-MoreStructure" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4192808415005519 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48376906494893984 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.290785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41582291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36103723404255317 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Ahdoot/Ahdoot_Test_StealthThinker/43c907eb-3e43-47ff-b38d-f912ba6ef46c.json b/leaderboard_data/HFOpenLLMv2/Ahdoot/Ahdoot_Test_StealthThinker/43c907eb-3e43-47ff-b38d-f912ba6ef46c.json deleted file mode 100644 index df4f9bab1e87cda2d9c0ef615b8ef6449dcf04bf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Ahdoot/Ahdoot_Test_StealthThinker/43c907eb-3e43-47ff-b38d-f912ba6ef46c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Ahdoot_Test_StealthThinker/1762652579.4775438", - "retrieved_timestamp": "1762652579.4775438", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Ahdoot/Test_StealthThinker", - "developer": "Ahdoot", - "inference_platform": "unknown", - "id": "Ahdoot/Test_StealthThinker" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42200361706937595 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46466398134666304 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17900302114803626 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42804166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35970744680851063 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V0-Coder/48732edf-8baf-438e-8a5c-763eee6c0c18.json b/leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V0-Coder/48732edf-8baf-438e-8a5c-763eee6c0c18.json deleted file mode 100644 index 3b8e9b3787b76cca88312b01ea21b83e50b633f5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V0-Coder/48732edf-8baf-438e-8a5c-763eee6c0c18.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AicoresSecurity_Cybernet-Sec-3B-R1-V0-Coder/1762652579.478028", - "retrieved_timestamp": "1762652579.478029", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder", - "developer": "AicoresSecurity", - "inference_platform": "unknown", - "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7097656440466851 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4477501104993749 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1487915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34079166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3178191489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V0/38f169f0-e939-4b12-8f78-b2a27fb90de0.json b/leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V0/38f169f0-e939-4b12-8f78-b2a27fb90de0.json deleted file mode 100644 index 9eddb6a2cd89c9ebc27b35f9c2a93e7b317c3111..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V0/38f169f0-e939-4b12-8f78-b2a27fb90de0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AicoresSecurity_Cybernet-Sec-3B-R1-V0/1762652579.4777558", - "retrieved_timestamp": "1762652579.477757", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AicoresSecurity/Cybernet-Sec-3B-R1-V0", - "developer": "AicoresSecurity", - "inference_platform": "unknown", - "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6358018945287394 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4497434194912941 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33136458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.301030585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V1.1/e8c63728-a1f5-432f-bf9f-204b0f4041aa.json b/leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V1.1/e8c63728-a1f5-432f-bf9f-204b0f4041aa.json deleted file mode 100644 index cfc591eef06683430fc989011a71370af4a92713..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V1.1/e8c63728-a1f5-432f-bf9f-204b0f4041aa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AicoresSecurity_Cybernet-Sec-3B-R1-V1.1/1762652579.478466", - "retrieved_timestamp": "1762652579.478467", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AicoresSecurity/Cybernet-Sec-3B-R1-V1.1", - "developer": "AicoresSecurity", - "inference_platform": "unknown", - "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6730209178313542 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4391775517124728 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35409375000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.308843085106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V1/b613ecbe-7b2b-4b03-ab2c-163f9988a8fc.json b/leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V1/b613ecbe-7b2b-4b03-ab2c-163f9988a8fc.json deleted file mode 100644 index 6369ee477ebbc7cceab4052618e261e07657230f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V1/b613ecbe-7b2b-4b03-ab2c-163f9988a8fc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AicoresSecurity_Cybernet-Sec-3B-R1-V1/1762652579.478252", - "retrieved_timestamp": "1762652579.4782531", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AicoresSecurity/Cybernet-Sec-3B-R1-V1", - "developer": "AicoresSecurity", - "inference_platform": "unknown", - "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6145693426774292 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4282342020189216 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15181268882175228 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32869791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2876496010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Alepach/Alepach_notHumpback-M0/1a4477f7-c414-41ab-bbcb-593f4a86031a.json b/leaderboard_data/HFOpenLLMv2/Alepach/Alepach_notHumpback-M0/1a4477f7-c414-41ab-bbcb-593f4a86031a.json deleted file mode 100644 index a96d316491db45c484045468d889889ba9a84ee1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Alepach/Alepach_notHumpback-M0/1a4477f7-c414-41ab-bbcb-593f4a86031a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Alepach_notHumpback-M0/1762652579.4786859", - "retrieved_timestamp": "1762652579.478687", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Alepach/notHumpback-M0", - "developer": "Alepach", - "inference_platform": "unknown", - "id": "Alepach/notHumpback-M0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23500755772461512 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27849287879199425 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35523958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1118683510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Alepach/Alepach_notHumpback-M1-v2/27c6c36d-6bd5-439b-bdc8-1bd0f8f4c9ea.json b/leaderboard_data/HFOpenLLMv2/Alepach/Alepach_notHumpback-M1-v2/27c6c36d-6bd5-439b-bdc8-1bd0f8f4c9ea.json deleted file mode 100644 index 02499a3dda39aed6e5cfbf103e0f1ed55b4a1a9b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Alepach/Alepach_notHumpback-M1-v2/27c6c36d-6bd5-439b-bdc8-1bd0f8f4c9ea.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Alepach_notHumpback-M1-v2/1762652579.4791439", - "retrieved_timestamp": "1762652579.479145", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Alepach/notHumpback-M1-v2", - "developer": "Alepach", - "inference_platform": "unknown", - "id": "Alepach/notHumpback-M1-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2277135777514772 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2775640398406834 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3473333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1118683510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Alepach/Alepach_notHumpback-M1/030f17b0-036f-4021-90da-6c1d38da659d.json b/leaderboard_data/HFOpenLLMv2/Alepach/Alepach_notHumpback-M1/030f17b0-036f-4021-90da-6c1d38da659d.json deleted file mode 100644 index 87a8aec8e8bc54bf85e2eda6ccedaac6b60fa714..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Alepach/Alepach_notHumpback-M1/030f17b0-036f-4021-90da-6c1d38da659d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Alepach_notHumpback-M1/1762652579.478936", - "retrieved_timestamp": "1762652579.4789371", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Alepach/notHumpback-M1", - "developer": "Alepach", - "inference_platform": "unknown", - "id": "Alepach/notHumpback-M1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2206944241279804 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28824720129981835 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23741610738255034 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.342 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10912566489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Alibaba-NLP/Alibaba-NLP_gte-Qwen2-7B-instruct/39ea9329-5ed7-46ea-bcc4-30679a63b405.json b/leaderboard_data/HFOpenLLMv2/Alibaba-NLP/Alibaba-NLP_gte-Qwen2-7B-instruct/39ea9329-5ed7-46ea-bcc4-30679a63b405.json deleted file mode 100644 index a6ef71f97ffcf14f33c1b756d40f88d04fb23d9a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Alibaba-NLP/Alibaba-NLP_gte-Qwen2-7B-instruct/39ea9329-5ed7-46ea-bcc4-30679a63b405.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Alibaba-NLP_gte-Qwen2-7B-instruct/1762652579.479603", - "retrieved_timestamp": "1762652579.479604", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Alibaba-NLP/gte-Qwen2-7B-instruct", - "developer": "Alibaba-NLP", - "inference_platform": "unknown", - "id": "Alibaba-NLP/gte-Qwen2-7B-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22554045488193547 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4495144990818469 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24496644295302014 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35585416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33211436170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Amaorynho/Amaorynho_BBAI2006/ef37c096-a089-4d3e-9fad-c0f959a18bb3.json b/leaderboard_data/HFOpenLLMv2/Amaorynho/Amaorynho_BBAI2006/ef37c096-a089-4d3e-9fad-c0f959a18bb3.json deleted file mode 100644 index e702ed215d7dda47e1d00065760a93ae40f5ac55..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Amaorynho/Amaorynho_BBAI2006/ef37c096-a089-4d3e-9fad-c0f959a18bb3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Amaorynho_BBAI2006/1762652579.480136", - "retrieved_timestamp": "1762652579.4801369", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Amaorynho/BBAI2006", - "developer": "Amaorynho", - "inference_platform": "unknown", - "id": "Amaorynho/BBAI2006" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14670518668244703 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2704366990167133 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3605416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228390957446809 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.09 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Amaorynho/Amaorynho_BBAI270V4/183313de-d526-42a9-a35d-a4e71466e546.json b/leaderboard_data/HFOpenLLMv2/Amaorynho/Amaorynho_BBAI270V4/183313de-d526-42a9-a35d-a4e71466e546.json deleted file mode 100644 index 454e2250788b9f5d9d28ed9add8c13c41b522881..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Amaorynho/Amaorynho_BBAI270V4/183313de-d526-42a9-a35d-a4e71466e546.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Amaorynho_BBAI270V4/1762652579.4803882", - "retrieved_timestamp": "1762652579.4803882", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Amaorynho/BBAI270V4", - "developer": "Amaorynho", - "inference_platform": "unknown", - "id": "Amaorynho/BBAI270V4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1990374428737971 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30712046736502824 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33139583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11136968085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Amaorynho/Amaorynho_BBAIIFEV1/7c0342a3-5bd4-47b0-b238-d5dcb0f6236e.json b/leaderboard_data/HFOpenLLMv2/Amaorynho/Amaorynho_BBAIIFEV1/7c0342a3-5bd4-47b0-b238-d5dcb0f6236e.json deleted file mode 100644 index 356439e4d5973180c3fd856b0daf211487842ea7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Amaorynho/Amaorynho_BBAIIFEV1/7c0342a3-5bd4-47b0-b238-d5dcb0f6236e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Amaorynho_BBAIIFEV1/1762652579.480599", - "retrieved_timestamp": "1762652579.4806", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Amaorynho/BBAIIFEV1", - "developer": "Amaorynho", - "inference_platform": "unknown", - "id": "Amaorynho/BBAIIFEV1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8047369867507104 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5292462038560509 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4184895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3857214095744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Amaorynho/Amaorynho_BBAI_375/ad4b6e40-883c-47c5-ba33-6c112c2c6b09.json b/leaderboard_data/HFOpenLLMv2/Amaorynho/Amaorynho_BBAI_375/ad4b6e40-883c-47c5-ba33-6c112c2c6b09.json deleted file mode 100644 index 3e6868b2b1fd6efbcdd007369f03315e116b848f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Amaorynho/Amaorynho_BBAI_375/ad4b6e40-883c-47c5-ba33-6c112c2c6b09.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Amaorynho_BBAI_375/1762652579.480799", - "retrieved_timestamp": "1762652579.480799", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Amaorynho/BBAI_375", - "developer": "Amaorynho", - "inference_platform": "unknown", - "id": "Amaorynho/BBAI_375" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14670518668244703 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2704366990167133 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3605416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228390957446809 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.09 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Amu/Amu_t1-1.5B/3e967795-680c-4bfc-906b-eadb969cf2bd.json b/leaderboard_data/HFOpenLLMv2/Amu/Amu_t1-1.5B/3e967795-680c-4bfc-906b-eadb969cf2bd.json deleted file mode 100644 index 865361cee5ecf4b1777ae668896b0e18b791ae51..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Amu/Amu_t1-1.5B/3e967795-680c-4bfc-906b-eadb969cf2bd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Amu_t1-1.5B/1762652579.481014", - "retrieved_timestamp": "1762652579.481015", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Amu/t1-1.5B", - "developer": "Amu", - "inference_platform": "unknown", - "id": "Amu/t1-1.5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3393717558300864 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4007606984109216 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3517083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2566489361702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Amu/Amu_t1-3B/c0b7e3e6-4160-4482-af4f-038ae79c7578.json b/leaderboard_data/HFOpenLLMv2/Amu/Amu_t1-3B/c0b7e3e6-4160-4482-af4f-038ae79c7578.json deleted file mode 100644 index a43460feab59b76238df6ef034f8ad922b265c88..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Amu/Amu_t1-3B/c0b7e3e6-4160-4482-af4f-038ae79c7578.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Amu_t1-3B/1762652579.481272", - "retrieved_timestamp": "1762652579.4812732", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Amu/t1-3B", - "developer": "Amu", - "inference_platform": "unknown", - "id": "Amu/t1-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33277703160946287 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39989750143834385 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13746223564954682 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2407718120805369 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34348958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12840757978723405 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ArliAI/ArliAI_ArliAI-RPMax-12B-v1.1/63fc1679-8504-41a0-98d5-2d23aad57b81.json b/leaderboard_data/HFOpenLLMv2/ArliAI/ArliAI_ArliAI-RPMax-12B-v1.1/63fc1679-8504-41a0-98d5-2d23aad57b81.json deleted file mode 100644 index b2a410e0de208aa5450384b0567b9b6657fd674c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ArliAI/ArliAI_ArliAI-RPMax-12B-v1.1/63fc1679-8504-41a0-98d5-2d23aad57b81.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ArliAI_ArliAI-RPMax-12B-v1.1/1762652579.481497", - "retrieved_timestamp": "1762652579.481498", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ArliAI/ArliAI-RPMax-12B-v1.1", - "developer": "ArliAI", - "inference_platform": "unknown", - "id": "ArliAI/ArliAI-RPMax-12B-v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5348852156721942 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.475181760840119 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36184375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3384308510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Arthur-LAGACHERIE/Arthur-LAGACHERIE_Precis-1B-Instruct/d93c70b5-cb3b-4647-aa47-15c2401f5ebf.json b/leaderboard_data/HFOpenLLMv2/Arthur-LAGACHERIE/Arthur-LAGACHERIE_Precis-1B-Instruct/d93c70b5-cb3b-4647-aa47-15c2401f5ebf.json deleted file mode 100644 index ab1b37a96d33c276f575587293704cfcc01db046..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Arthur-LAGACHERIE/Arthur-LAGACHERIE_Precis-1B-Instruct/d93c70b5-cb3b-4647-aa47-15c2401f5ebf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Arthur-LAGACHERIE_Precis-1B-Instruct/1762652579.482005", - "retrieved_timestamp": "1762652579.482006", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Arthur-LAGACHERIE/Precis-1B-Instruct", - "developer": "Arthur-LAGACHERIE", - "inference_platform": "unknown", - "id": "Arthur-LAGACHERIE/Precis-1B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3670738086056109 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3223614510687368 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34355208333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14261968085106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Artples/Artples_L-MChat-7b/7aeaf034-1c02-4da7-b7b4-9a27ce759601.json b/leaderboard_data/HFOpenLLMv2/Artples/Artples_L-MChat-7b/7aeaf034-1c02-4da7-b7b4-9a27ce759601.json deleted file mode 100644 index 9a6458ce2e10782e6e79b9984ce67c97c52ca3cb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Artples/Artples_L-MChat-7b/7aeaf034-1c02-4da7-b7b4-9a27ce759601.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Artples_L-MChat-7b/1762652579.482251", - "retrieved_timestamp": "1762652579.482251", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Artples/L-MChat-7b", - "developer": "Artples", - "inference_platform": "unknown", - "id": "Artples/L-MChat-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5296646231997766 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46003301674679414 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09214501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4028645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3298703457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Artples/Artples_L-MChat-Small/0e5a84e3-b90f-4c20-ad58-4d1cf3517f28.json b/leaderboard_data/HFOpenLLMv2/Artples/Artples_L-MChat-Small/0e5a84e3-b90f-4c20-ad58-4d1cf3517f28.json deleted file mode 100644 index 0c48fa262087b1913c2aefba7dc832a918bcbfc9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Artples/Artples_L-MChat-Small/0e5a84e3-b90f-4c20-ad58-4d1cf3517f28.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Artples_L-MChat-Small/1762652579.4824991", - "retrieved_timestamp": "1762652579.4825", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Artples/L-MChat-Small", - "developer": "Artples", - "inference_platform": "unknown", - "id": "Artples/L-MChat-Small" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32870561222002065 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48225627665257265 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36959375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24642619680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Aryanne/Aryanne_SHBA/a1c56b87-d8d4-4570-9c33-b84dd066d92f.json b/leaderboard_data/HFOpenLLMv2/Aryanne/Aryanne_SHBA/a1c56b87-d8d4-4570-9c33-b84dd066d92f.json deleted file mode 100644 index cb58053079b9ee3e52750a60c395ed9c9e3fbe54..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Aryanne/Aryanne_SHBA/a1c56b87-d8d4-4570-9c33-b84dd066d92f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Aryanne_SHBA/1762652579.482961", - "retrieved_timestamp": "1762652579.482962", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Aryanne/SHBA", - "developer": "Aryanne", - "inference_platform": "unknown", - "id": "Aryanne/SHBA" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7816560060639104 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5233174837035715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41613541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3892121010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Aryanne/Aryanne_SuperHeart/c6fae489-9bf8-40e5-a602-1c6ce9000537.json b/leaderboard_data/HFOpenLLMv2/Aryanne/Aryanne_SuperHeart/c6fae489-9bf8-40e5-a602-1c6ce9000537.json deleted file mode 100644 index 9d8c9888df4f63350ae4068437fcb627c155a997..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Aryanne/Aryanne_SuperHeart/c6fae489-9bf8-40e5-a602-1c6ce9000537.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Aryanne_SuperHeart/1762652579.483199", - "retrieved_timestamp": "1762652579.4832", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Aryanne/SuperHeart", - "developer": "Aryanne", - "inference_platform": "unknown", - "id": "Aryanne/SuperHeart" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5192234382549413 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5215375046264326 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44357291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3912067819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Ateron/Ateron_Glowing-Forest-12B/13716fd0-049a-4e9a-90ca-af9db59c1703.json b/leaderboard_data/HFOpenLLMv2/Ateron/Ateron_Glowing-Forest-12B/13716fd0-049a-4e9a-90ca-af9db59c1703.json deleted file mode 100644 index 600307e405832bb27a6344253b3a43f282fd86c4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Ateron/Ateron_Glowing-Forest-12B/13716fd0-049a-4e9a-90ca-af9db59c1703.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Ateron_Glowing-Forest-12B/1762652579.484101", - "retrieved_timestamp": "1762652579.4841018", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Ateron/Glowing-Forest-12B", - "developer": "Ateron", - "inference_platform": "unknown", - "id": "Ateron/Glowing-Forest-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3591803082487799 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.549176294722067 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44490625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37175864361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Ateron/Ateron_Lotus-Magpic/bedab846-a6b2-4c51-9690-27deb7a76fe7.json b/leaderboard_data/HFOpenLLMv2/Ateron/Ateron_Lotus-Magpic/bedab846-a6b2-4c51-9690-27deb7a76fe7.json deleted file mode 100644 index 3c4642a4e4c9f145e6319ed88f0fcb7f3aa4b068..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Ateron/Ateron_Lotus-Magpic/bedab846-a6b2-4c51-9690-27deb7a76fe7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Ateron_Lotus-Magpic/1762652579.484373", - "retrieved_timestamp": "1762652579.484374", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Ateron/Lotus-Magpic", - "developer": "Ateron", - "inference_platform": "unknown", - "id": "Ateron/Lotus-Magpic" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6286076499244228 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5253514950133299 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4331875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3490691489361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Ateron/Ateron_Way_of_MagPicaro/0a5e585d-1a90-4849-9df5-670a56b9f161.json b/leaderboard_data/HFOpenLLMv2/Ateron/Ateron_Way_of_MagPicaro/0a5e585d-1a90-4849-9df5-670a56b9f161.json deleted file mode 100644 index 3d5e7b239b629f317859a85da386a3d2a61394bf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Ateron/Ateron_Way_of_MagPicaro/0a5e585d-1a90-4849-9df5-670a56b9f161.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Ateron_Way_of_MagPicaro/1762652579.484595", - "retrieved_timestamp": "1762652579.484596", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Ateron/Way_of_MagPicaro", - "developer": "Ateron", - "inference_platform": "unknown", - "id": "Ateron/Way_of_MagPicaro" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2637091805298829 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5427386861946704 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46490625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35355718085106386 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/AuraIndustries/AuraIndustries_Aura-4B/5fe88e89-1055-4357-9394-004dd4635e58.json b/leaderboard_data/HFOpenLLMv2/AuraIndustries/AuraIndustries_Aura-4B/5fe88e89-1055-4357-9394-004dd4635e58.json deleted file mode 100644 index 7ad40e181233bdd6f060ea20ced6b98c7993f0cb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/AuraIndustries/AuraIndustries_Aura-4B/5fe88e89-1055-4357-9394-004dd4635e58.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AuraIndustries_Aura-4B/1762652579.484812", - "retrieved_timestamp": "1762652579.484813", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AuraIndustries/Aura-4B", - "developer": "AuraIndustries", - "inference_platform": "unknown", - "id": "AuraIndustries/Aura-4B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38156203318306536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4490409465001946 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39384375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27061170212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.513 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/AuraIndustries/AuraIndustries_Aura-8B/39e029ad-b385-4b26-9a02-b40c90cd8ad8.json b/leaderboard_data/HFOpenLLMv2/AuraIndustries/AuraIndustries_Aura-8B/39e029ad-b385-4b26-9a02-b40c90cd8ad8.json deleted file mode 100644 index c67454836729d8da7b820fdbd6a55c5b38af90b9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/AuraIndustries/AuraIndustries_Aura-8B/39e029ad-b385-4b26-9a02-b40c90cd8ad8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AuraIndustries_Aura-8B/1762652579.485057", - "retrieved_timestamp": "1762652579.485057", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AuraIndustries/Aura-8B", - "developer": "AuraIndustries", - "inference_platform": "unknown", - "id": "AuraIndustries/Aura-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7205315230255722 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5131231419849063 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15181268882175228 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4004479166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38738364361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/AuraIndustries/AuraIndustries_Aura-MoE-2x4B-v2/3402882b-af4e-4509-9d57-32efa5d8c495.json b/leaderboard_data/HFOpenLLMv2/AuraIndustries/AuraIndustries_Aura-MoE-2x4B-v2/3402882b-af4e-4509-9d57-32efa5d8c495.json deleted file mode 100644 index 5693b31935b1c0c6bc276120d3b4fdbeeb97ebe9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/AuraIndustries/AuraIndustries_Aura-MoE-2x4B-v2/3402882b-af4e-4509-9d57-32efa5d8c495.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AuraIndustries_Aura-MoE-2x4B-v2/1762652579.4855082", - "retrieved_timestamp": "1762652579.4855092", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AuraIndustries/Aura-MoE-2x4B-v2", - "developer": "AuraIndustries", - "inference_platform": "unknown", - "id": "AuraIndustries/Aura-MoE-2x4B-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4777822843388875 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43152444292813597 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4100625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609707446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 7.231 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/AuraIndustries/AuraIndustries_Aura-MoE-2x4B/8239ffac-3fca-4eab-86d4-78bab22dc420.json b/leaderboard_data/HFOpenLLMv2/AuraIndustries/AuraIndustries_Aura-MoE-2x4B/8239ffac-3fca-4eab-86d4-78bab22dc420.json deleted file mode 100644 index bb5cb66aa9d2fd2674e8ee6cd853cd4616c52402..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/AuraIndustries/AuraIndustries_Aura-MoE-2x4B/8239ffac-3fca-4eab-86d4-78bab22dc420.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AuraIndustries_Aura-MoE-2x4B/1762652579.48526", - "retrieved_timestamp": "1762652579.485261", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AuraIndustries/Aura-MoE-2x4B", - "developer": "AuraIndustries", - "inference_platform": "unknown", - "id": "AuraIndustries/Aura-MoE-2x4B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.460096987105325 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43385067041774666 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40851041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26496010638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 7.231 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Aurel9/Aurel9_testmerge-7b/eb45737a-74bc-482d-9d7f-d2bd1d876c77.json b/leaderboard_data/HFOpenLLMv2/Aurel9/Aurel9_testmerge-7b/eb45737a-74bc-482d-9d7f-d2bd1d876c77.json deleted file mode 100644 index dbaa6ce3eb875aec16f10405decad7ee230b8e34..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Aurel9/Aurel9_testmerge-7b/eb45737a-74bc-482d-9d7f-d2bd1d876c77.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Aurel9_testmerge-7b/1762652579.485724", - "retrieved_timestamp": "1762652579.485725", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Aurel9/testmerge-7b", - "developer": "Aurel9", - "inference_platform": "unknown", - "id": "Aurel9/testmerge-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3979984219648311 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5189590919105128 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06570996978851963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4658645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3052692819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Ayush-Singh/Ayush-Singh_Llama1B-sft-2/678cad7f-854b-4dc3-91cc-2d1774ef7faf.json b/leaderboard_data/HFOpenLLMv2/Ayush-Singh/Ayush-Singh_Llama1B-sft-2/678cad7f-854b-4dc3-91cc-2d1774ef7faf.json deleted file mode 100644 index ec3fca4c46139cbfc90e37959a3aae3b639329e5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Ayush-Singh/Ayush-Singh_Llama1B-sft-2/678cad7f-854b-4dc3-91cc-2d1774ef7faf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Ayush-Singh_Llama1B-sft-2/1762652579.4859679", - "retrieved_timestamp": "1762652579.4859688", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Ayush-Singh/Llama1B-sft-2", - "developer": "Ayush-Singh", - "inference_platform": "unknown", - "id": "Ayush-Singh/Llama1B-sft-2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13743755457741016 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.283428204214368 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35520833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11170212765957446 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_Blossom-V6-14B/24ce59a5-c351-4ed8-8944-8ec5db739da8.json b/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_Blossom-V6-14B/24ce59a5-c351-4ed8-8944-8ec5db739da8.json deleted file mode 100644 index f3857256defd9a8a0d99ddb54739d2ac88720e03..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_Blossom-V6-14B/24ce59a5-c351-4ed8-8944-8ec5db739da8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Azure99_Blossom-V6-14B/1762652579.486225", - "retrieved_timestamp": "1762652579.4862258", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Azure99/Blossom-V6-14B", - "developer": "Azure99", - "inference_platform": "unknown", - "id": "Azure99/Blossom-V6-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6395486198841297 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5068726694646123 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.525679758308157 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40352083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4543716755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_Blossom-V6-7B/35949fb3-8c01-45cf-b4db-bbe983b15ac6.json b/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_Blossom-V6-7B/35949fb3-8c01-45cf-b4db-bbe983b15ac6.json deleted file mode 100644 index 5a6876c4a0168b7c1bf446b1fbd20add3ce16b30..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_Blossom-V6-7B/35949fb3-8c01-45cf-b4db-bbe983b15ac6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Azure99_Blossom-V6-7B/1762652579.486468", - "retrieved_timestamp": "1762652579.486469", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Azure99/Blossom-V6-7B", - "developer": "Azure99", - "inference_platform": "unknown", - "id": "Azure99/Blossom-V6-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5538194213575536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49736683240887 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45845921450151056 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43009375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41439494680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_blossom-v5-32b/6adfe39d-f2c2-4101-8f0f-7496d55397cd.json b/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_blossom-v5-32b/6adfe39d-f2c2-4101-8f0f-7496d55397cd.json deleted file mode 100644 index 671fce54d50857cc9ea7348b97c8c10ee9c44413..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_blossom-v5-32b/6adfe39d-f2c2-4101-8f0f-7496d55397cd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Azure99_blossom-v5-32b/1762652579.4866729", - "retrieved_timestamp": "1762652579.4866738", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Azure99/blossom-v5-32b", - "developer": "Azure99", - "inference_platform": "unknown", - "id": "Azure99/blossom-v5-32b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5235441960664371 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5954545257004673 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1865558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40199999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4234541223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.512 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_blossom-v5.1-34b/d2342413-1b55-4da5-a6e5-da6274f309ad.json b/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_blossom-v5.1-34b/d2342413-1b55-4da5-a6e5-da6274f309ad.json deleted file mode 100644 index 53abe01ec5c746a5138bdf9f725be8e8e0f8a410..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_blossom-v5.1-34b/d2342413-1b55-4da5-a6e5-da6274f309ad.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Azure99_blossom-v5.1-34b/1762652579.4871309", - "retrieved_timestamp": "1762652579.4871309", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Azure99/blossom-v5.1-34b", - "developer": "Azure99", - "inference_platform": "unknown", - "id": "Azure99/blossom-v5.1-34b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5696562897556262 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6109110096611161 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2590634441087613 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39279166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4557845744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_blossom-v5.1-9b/8eb55323-b0d7-4419-aec6-03de8bcd472e.json b/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_blossom-v5.1-9b/8eb55323-b0d7-4419-aec6-03de8bcd472e.json deleted file mode 100644 index f61dcc05f1667418d7b0e1a14360aaaa60647f62..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_blossom-v5.1-9b/8eb55323-b0d7-4419-aec6-03de8bcd472e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Azure99_blossom-v5.1-9b/1762652579.487347", - "retrieved_timestamp": "1762652579.487348", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Azure99/blossom-v5.1-9b", - "developer": "Azure99", - "inference_platform": "unknown", - "id": "Azure99/blossom-v5.1-9b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5085816744016985 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5343292377916368 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2122356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39939583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39793882978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0613-Llama3-70B/69cea95c-c167-42f4-a233-f7739f86f6a7.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0613-Llama3-70B/69cea95c-c167-42f4-a233-f7739f86f6a7.json deleted file mode 100644 index d8fe59d837420d312bd3941130ae14240d008df1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0613-Llama3-70B/69cea95c-c167-42f4-a233-f7739f86f6a7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0613-Llama3-70B/1762652579.487831", - "retrieved_timestamp": "1762652579.487832", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BAAI/Infinity-Instruct-3M-0613-Llama3-70B", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/Infinity-Instruct-3M-0613-Llama3-70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6821134589555713 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6641614484348598 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21525679758308158 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35822147651006714 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45226041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47298869680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0613-Mistral-7B/9d9ac91a-f339-41a4-ae91-3dba41b06382.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0613-Mistral-7B/9d9ac91a-f339-41a4-ae91-3dba41b06382.json deleted file mode 100644 index 426cb5598ee391224c6ee54c236010c153e8c78f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0613-Mistral-7B/9d9ac91a-f339-41a4-ae91-3dba41b06382.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0613-Mistral-7B/1762652579.48831", - "retrieved_timestamp": "1762652579.4883142", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BAAI/Infinity-Instruct-3M-0613-Mistral-7B", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/Infinity-Instruct-3M-0613-Mistral-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5319873491225504 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49582333763258896 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4350833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31607380319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Llama3-70B/73eb53bc-a090-4415-8fdc-a767a2e00188.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Llama3-70B/73eb53bc-a090-4415-8fdc-a767a2e00188.json deleted file mode 100644 index b05cb69e6ff6351a511555b421c8f73ed0c27336..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Llama3-70B/73eb53bc-a090-4415-8fdc-a767a2e00188.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0625-Llama3-70B/1762652579.4887528", - "retrieved_timestamp": "1762652579.488755", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BAAI/Infinity-Instruct-3M-0625-Llama3-70B", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/Infinity-Instruct-3M-0625-Llama3-70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7442120240960651 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6670337872930245 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22507552870090636 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3573825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46165625000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4586103723404255 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Llama3-8B/00d87824-732a-4746-8d9f-ce7b1f45c0ae.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Llama3-8B/00d87824-732a-4746-8d9f-ce7b1f45c0ae.json deleted file mode 100644 index 4afbfb41565c82c49c3cd4b15cda08567ddc5e51..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Llama3-8B/00d87824-732a-4746-8d9f-ce7b1f45c0ae.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0625-Llama3-8B/1762652579.4890082", - "retrieved_timestamp": "1762652579.489009", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BAAI/Infinity-Instruct-3M-0625-Llama3-8B", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/Infinity-Instruct-3M-0625-Llama3-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6050268842227512 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4954985723563075 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08836858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37120833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3252160904255319 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Mistral-7B/be3423f2-98f0-414a-b0c3-efd0d60d4cb3.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Mistral-7B/be3423f2-98f0-414a-b0c3-efd0d60d4cb3.json deleted file mode 100644 index 3d04b7f6606d686deb04773cf7799aa2630292f0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Mistral-7B/be3423f2-98f0-414a-b0c3-efd0d60d4cb3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0625-Mistral-7B/1762652579.489246", - "retrieved_timestamp": "1762652579.489247", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BAAI/Infinity-Instruct-3M-0625-Mistral-7B", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/Infinity-Instruct-3M-0625-Mistral-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5867420666054957 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4939670574681802 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42723958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3229720744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Qwen2-7B/2390d668-3273-4f58-a0fd-b13b9d9b1651.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Qwen2-7B/2390d668-3273-4f58-a0fd-b13b9d9b1651.json deleted file mode 100644 index 367ecd17700b8d8929ef8b1df37faa0709b6711b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Qwen2-7B/2390d668-3273-4f58-a0fd-b13b9d9b1651.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0625-Qwen2-7B/1762652579.489471", - "retrieved_timestamp": "1762652579.489472", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BAAI/Infinity-Instruct-3M-0625-Qwen2-7B", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/Infinity-Instruct-3M-0625-Qwen2-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5553930238434022 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5345911997776569 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38876041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39602726063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Yi-1.5-9B/8a2d5e9c-7d41-4638-8b8c-58d08fc0912b.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Yi-1.5-9B/8a2d5e9c-7d41-4638-8b8c-58d08fc0912b.json deleted file mode 100644 index a42525cdae5d295def4835fbfd3a14aa496edce3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Yi-1.5-9B/8a2d5e9c-7d41-4638-8b8c-58d08fc0912b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0625-Yi-1.5-9B/1762652579.489686", - "retrieved_timestamp": "1762652579.489687", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BAAI/Infinity-Instruct-3M-0625-Yi-1.5-9B", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/Infinity-Instruct-3M-0625-Yi-1.5-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5185984299436606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5509115146247398 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16389728096676737 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540268456375839 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45753125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41181848404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-0729-Llama3_1-8B/eace7f56-b853-436d-a744-bfdb9e227993.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-0729-Llama3_1-8B/eace7f56-b853-436d-a744-bfdb9e227993.json deleted file mode 100644 index 6ec710e814b5a73faf217fe9f3600cbfd0d4d75a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-0729-Llama3_1-8B/eace7f56-b853-436d-a744-bfdb9e227993.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-7M-0729-Llama3_1-8B/1762652579.489912", - "retrieved_timestamp": "1762652579.489913", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6131952109292234 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5077335431381055 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35784375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3223902925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-0729-mistral-7B/25477dff-04c5-4cb8-9ad9-3a13448a2a7d.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-0729-mistral-7B/25477dff-04c5-4cb8-9ad9-3a13448a2a7d.json deleted file mode 100644 index 1ee53694737ca916d7a56b69979169579d3ef4fd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-0729-mistral-7B/25477dff-04c5-4cb8-9ad9-3a13448a2a7d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-7M-0729-mistral-7B/1762652579.490131", - "retrieved_timestamp": "1762652579.490131", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BAAI/Infinity-Instruct-7M-0729-mistral-7B", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/Infinity-Instruct-7M-0729-mistral-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6161928128476886 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4963813586525743 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4061875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3273769946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-Gen-Llama3_1-70B/b04b4e4d-2f15-446b-974f-21f72fd80fe0.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-Gen-Llama3_1-70B/b04b4e4d-2f15-446b-974f-21f72fd80fe0.json deleted file mode 100644 index e41be63563b1637e760bd6bb8da88fa1eace3846..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-Gen-Llama3_1-70B/b04b4e4d-2f15-446b-974f-21f72fd80fe0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-7M-Gen-Llama3_1-70B/1762652579.490346", - "retrieved_timestamp": "1762652579.490347", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7335458804859993 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6695200461367471 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25226586102719034 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37583892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45390625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.460688164893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-Gen-Llama3_1-8B/84f2027c-3e68-489e-902b-2fec6ec8f850.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-Gen-Llama3_1-8B/84f2027c-3e68-489e-902b-2fec6ec8f850.json deleted file mode 100644 index 91365b830aa4b471b9334d7fe35522b4bc35696e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-Gen-Llama3_1-8B/84f2027c-3e68-489e-902b-2fec6ec8f850.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-7M-Gen-Llama3_1-8B/1762652579.4905548", - "retrieved_timestamp": "1762652579.490556", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6131952109292234 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5077335431381055 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35784375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3223902925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-Gen-mistral-7B/51daf5e7-1d4e-4753-b24b-79273e6f9370.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-Gen-mistral-7B/51daf5e7-1d4e-4753-b24b-79273e6f9370.json deleted file mode 100644 index 6346de938342970c97452c09775d23b3b5d03f7b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-Gen-mistral-7B/51daf5e7-1d4e-4753-b24b-79273e6f9370.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-7M-Gen-mistral-7B/1762652579.490771", - "retrieved_timestamp": "1762652579.490772", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BAAI/Infinity-Instruct-7M-Gen-mistral-7B", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/Infinity-Instruct-7M-Gen-mistral-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6146690780462506 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4963813586525743 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4061875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3273769946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_OPI-Llama-3.1-8B-Instruct/567f27f3-3f64-4054-aa67-684c29e4d71a.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_OPI-Llama-3.1-8B-Instruct/567f27f3-3f64-4054-aa67-684c29e4d71a.json deleted file mode 100644 index d92e1d660df05931e6c610d2e3d476faaf5bf446..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_OPI-Llama-3.1-8B-Instruct/567f27f3-3f64-4054-aa67-684c29e4d71a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BAAI_OPI-Llama-3.1-8B-Instruct/1762652579.490996", - "retrieved_timestamp": "1762652579.490996", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BAAI/OPI-Llama-3.1-8B-Instruct", - "developer": "BAAI", - "inference_platform": "unknown", - "id": "BAAI/OPI-Llama-3.1-8B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20745510800232272 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3551224419497605 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3233020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21243351063829788 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BEE-spoke-data/BEE-spoke-data_tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024/112be4bf-bfac-470f-bde8-c1e4d7282667.json b/leaderboard_data/HFOpenLLMv2/BEE-spoke-data/BEE-spoke-data_tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024/112be4bf-bfac-470f-bde8-c1e4d7282667.json deleted file mode 100644 index 71e72e2d278f215dc737929bf399b6ed4b45dcc9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BEE-spoke-data/BEE-spoke-data_tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024/112be4bf-bfac-470f-bde8-c1e4d7282667.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BEE-spoke-data_tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024/1762652579.492853", - "retrieved_timestamp": "1762652579.492853", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BEE-spoke-data/tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024", - "developer": "BEE-spoke-data", - "inference_platform": "unknown", - "id": "BEE-spoke-data/tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13206735905176042 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3137786304497592 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43927083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12367021276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "T5ForConditionalGeneration", - "params_billions": 0.887 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BEE-spoke-data/BEE-spoke-data_tFINE-900m-e16-d32-flan/cdf0ce69-4697-4f16-a769-80691cc08b27.json b/leaderboard_data/HFOpenLLMv2/BEE-spoke-data/BEE-spoke-data_tFINE-900m-e16-d32-flan/cdf0ce69-4697-4f16-a769-80691cc08b27.json deleted file mode 100644 index ad3cc2bd9d4af96fc7f135e02de5db00c1e8c7fe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BEE-spoke-data/BEE-spoke-data_tFINE-900m-e16-d32-flan/cdf0ce69-4697-4f16-a769-80691cc08b27.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BEE-spoke-data_tFINE-900m-e16-d32-flan/1762652579.492592", - "retrieved_timestamp": "1762652579.492592", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BEE-spoke-data/tFINE-900m-e16-d32-flan", - "developer": "BEE-spoke-data", - "inference_platform": "unknown", - "id": "BEE-spoke-data/tFINE-900m-e16-d32-flan" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15057713533424646 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30280434847620613 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2332214765100671 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3724166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1307347074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "T5ForConditionalGeneration", - "params_billions": 0.887 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BEE-spoke-data/BEE-spoke-data_tFINE-900m-e16-d32-instruct_2e/7b1574ca-4106-42c0-9336-27df4f0851aa.json b/leaderboard_data/HFOpenLLMv2/BEE-spoke-data/BEE-spoke-data_tFINE-900m-e16-d32-instruct_2e/7b1574ca-4106-42c0-9336-27df4f0851aa.json deleted file mode 100644 index 374b0997c07f72f30e076ed107642c524b2cc9df..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BEE-spoke-data/BEE-spoke-data_tFINE-900m-e16-d32-instruct_2e/7b1574ca-4106-42c0-9336-27df4f0851aa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BEE-spoke-data_tFINE-900m-e16-d32-instruct_2e/1762652579.493063", - "retrieved_timestamp": "1762652579.493064", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e", - "developer": "BEE-spoke-data", - "inference_platform": "unknown", - "id": "BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1402855534426433 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31345674638809023 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42069791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12367021276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "T5ForConditionalGeneration", - "params_billions": 0.887 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BEE-spoke-data/BEE-spoke-data_tFINE-900m-instruct-orpo/e91b6749-3103-4cfa-bf16-86126ee2086e.json b/leaderboard_data/HFOpenLLMv2/BEE-spoke-data/BEE-spoke-data_tFINE-900m-instruct-orpo/e91b6749-3103-4cfa-bf16-86126ee2086e.json deleted file mode 100644 index 48cdf2542590a3cf792084fa18485e5af78e3fdd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BEE-spoke-data/BEE-spoke-data_tFINE-900m-instruct-orpo/e91b6749-3103-4cfa-bf16-86126ee2086e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BEE-spoke-data_tFINE-900m-instruct-orpo/1762652579.493278", - "retrieved_timestamp": "1762652579.493279", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BEE-spoke-data/tFINE-900m-instruct-orpo", - "developer": "BEE-spoke-data", - "inference_platform": "unknown", - "id": "BEE-spoke-data/tFINE-900m-instruct-orpo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13299157346950535 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30220933767045094 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3408541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11519281914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "T5ForConditionalGeneration", - "params_billions": 0.887 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BSC-LT/BSC-LT_salamandra-7b-instruct/2eb60f3a-53f4-478a-8292-aa5e210a8cdf.json b/leaderboard_data/HFOpenLLMv2/BSC-LT/BSC-LT_salamandra-7b-instruct/2eb60f3a-53f4-478a-8292-aa5e210a8cdf.json deleted file mode 100644 index ebf36d902c51602dd63692564f30a0232ea687e6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BSC-LT/BSC-LT_salamandra-7b-instruct/2eb60f3a-53f4-478a-8292-aa5e210a8cdf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BSC-LT_salamandra-7b-instruct/1762652579.493781", - "retrieved_timestamp": "1762652579.493781", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BSC-LT/salamandra-7b-instruct", - "developer": "BSC-LT", - "inference_platform": "unknown", - "id": "BSC-LT/salamandra-7b-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24507418095098782 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3851324290080956 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41343749999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18051861702127658 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.768 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BSC-LT/BSC-LT_salamandra-7b/36d2d3af-60aa-4624-b414-e249d06b6ee1.json b/leaderboard_data/HFOpenLLMv2/BSC-LT/BSC-LT_salamandra-7b/36d2d3af-60aa-4624-b414-e249d06b6ee1.json deleted file mode 100644 index 2877eafae85bdbf463f001300fe3696a480a76ac..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BSC-LT/BSC-LT_salamandra-7b/36d2d3af-60aa-4624-b414-e249d06b6ee1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BSC-LT_salamandra-7b/1762652579.493503", - "retrieved_timestamp": "1762652579.493503", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BSC-LT/salamandra-7b", - "developer": "BSC-LT", - "inference_platform": "unknown", - "id": "BSC-LT/salamandra-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13673829882489574 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3516612209885983 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35009375000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14926861702127658 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.768 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Baptiste-HUVELLE-10/Baptiste-HUVELLE-10_LeTriomphant2.2_ECE_iLAB/b1632b15-fa00-4476-b3f4-05aba95df664.json b/leaderboard_data/HFOpenLLMv2/Baptiste-HUVELLE-10/Baptiste-HUVELLE-10_LeTriomphant2.2_ECE_iLAB/b1632b15-fa00-4476-b3f4-05aba95df664.json deleted file mode 100644 index a65778130a1a38105ac4d0f50d1ea6f57b8eea19..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Baptiste-HUVELLE-10/Baptiste-HUVELLE-10_LeTriomphant2.2_ECE_iLAB/b1632b15-fa00-4476-b3f4-05aba95df664.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Baptiste-HUVELLE-10_LeTriomphant2.2_ECE_iLAB/1762652579.4943", - "retrieved_timestamp": "1762652579.4943008", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Baptiste-HUVELLE-10/LeTriomphant2.2_ECE_iLAB", - "developer": "Baptiste-HUVELLE-10", - "inference_platform": "unknown", - "id": "Baptiste-HUVELLE-10/LeTriomphant2.2_ECE_iLAB" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5076330802271307 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7256319952414622 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44486404833836857 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39932885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46255208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5851063829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BenevolenceMessiah/BenevolenceMessiah_Qwen2.5-72B-2x-Instruct-TIES-v1.0/ad8e3029-612c-434e-a92b-f5c481476e25.json b/leaderboard_data/HFOpenLLMv2/BenevolenceMessiah/BenevolenceMessiah_Qwen2.5-72B-2x-Instruct-TIES-v1.0/ad8e3029-612c-434e-a92b-f5c481476e25.json deleted file mode 100644 index 369f9325e987d6b6bd8717e942cbbe66654cf1a0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BenevolenceMessiah/BenevolenceMessiah_Qwen2.5-72B-2x-Instruct-TIES-v1.0/ad8e3029-612c-434e-a92b-f5c481476e25.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BenevolenceMessiah_Qwen2.5-72B-2x-Instruct-TIES-v1.0/1762652579.4945831", - "retrieved_timestamp": "1762652579.494584", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BenevolenceMessiah/Qwen2.5-72B-2x-Instruct-TIES-v1.0", - "developer": "BenevolenceMessiah", - "inference_platform": "unknown", - "id": "BenevolenceMessiah/Qwen2.5-72B-2x-Instruct-TIES-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5473499204333391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.727311411382245 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5785498489425982 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3674496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4206666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5628324468085106 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.7 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BenevolenceMessiah/BenevolenceMessiah_Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0/129ba653-ec88-46f2-8828-77e320b922c6.json b/leaderboard_data/HFOpenLLMv2/BenevolenceMessiah/BenevolenceMessiah_Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0/129ba653-ec88-46f2-8828-77e320b922c6.json deleted file mode 100644 index ae809373b904b32af0981ac435dee2eff3ebae23..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BenevolenceMessiah/BenevolenceMessiah_Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0/129ba653-ec88-46f2-8828-77e320b922c6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BenevolenceMessiah_Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0/1762652579.4948769", - "retrieved_timestamp": "1762652579.494878", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BenevolenceMessiah/Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0", - "developer": "BenevolenceMessiah", - "inference_platform": "unknown", - "id": "BenevolenceMessiah/Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011531624977283 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4908666248538678 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4079791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26803523936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 28.309 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BlackBeenie/BlackBeenie_Bloslain-8B-v0.2/160fb625-9c1c-40c1-ab93-7d9f7a2220d2.json b/leaderboard_data/HFOpenLLMv2/BlackBeenie/BlackBeenie_Bloslain-8B-v0.2/160fb625-9c1c-40c1-ab93-7d9f7a2220d2.json deleted file mode 100644 index a7bc1d1e9b54c970754b8ebb76cccc76e4781337..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BlackBeenie/BlackBeenie_Bloslain-8B-v0.2/160fb625-9c1c-40c1-ab93-7d9f7a2220d2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BlackBeenie_Bloslain-8B-v0.2/1762652579.495104", - "retrieved_timestamp": "1762652579.495104", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BlackBeenie/Bloslain-8B-v0.2", - "developer": "BlackBeenie", - "inference_platform": "unknown", - "id": "BlackBeenie/Bloslain-8B-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5023371321427147 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.511087946253543 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14501510574018128 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4075729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3653590425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BlackBeenie/BlackBeenie_Llama-3.1-8B-OpenO1-SFT-v0.1/b298e0fc-f4fb-4464-beb8-45f8b5f35653.json b/leaderboard_data/HFOpenLLMv2/BlackBeenie/BlackBeenie_Llama-3.1-8B-OpenO1-SFT-v0.1/b298e0fc-f4fb-4464-beb8-45f8b5f35653.json deleted file mode 100644 index 785931f944772b0e25f480e73ad5dfbf7d983501..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BlackBeenie/BlackBeenie_Llama-3.1-8B-OpenO1-SFT-v0.1/b298e0fc-f4fb-4464-beb8-45f8b5f35653.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BlackBeenie_Llama-3.1-8B-OpenO1-SFT-v0.1/1762652579.495378", - "retrieved_timestamp": "1762652579.495378", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BlackBeenie/Llama-3.1-8B-OpenO1-SFT-v0.1", - "developer": "BlackBeenie", - "inference_platform": "unknown", - "id": "BlackBeenie/Llama-3.1-8B-OpenO1-SFT-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5124037553690873 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4787448361604986 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15256797583081572 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36181250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34915226063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BoltMonkey/BoltMonkey_DreadMix/e6b5e728-28a4-444a-8b6b-89d29b7b5225.json b/leaderboard_data/HFOpenLLMv2/BoltMonkey/BoltMonkey_DreadMix/e6b5e728-28a4-444a-8b6b-89d29b7b5225.json deleted file mode 100644 index 96989649a3ba341117481de6daa07ef603272bfa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BoltMonkey/BoltMonkey_DreadMix/e6b5e728-28a4-444a-8b6b-89d29b7b5225.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BoltMonkey_DreadMix/1762652579.497959", - "retrieved_timestamp": "1762652579.497961", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BoltMonkey/DreadMix", - "developer": "BoltMonkey", - "inference_platform": "unknown", - "id": "BoltMonkey/DreadMix" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7094908176970438 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5435097438362475 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1555891238670695 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42121875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37898936170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BoltMonkey/BoltMonkey_NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/d9e3bd73-cd7e-46d4-9e62-0cfac178f62a.json b/leaderboard_data/HFOpenLLMv2/BoltMonkey/BoltMonkey_NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/d9e3bd73-cd7e-46d4-9e62-0cfac178f62a.json deleted file mode 100644 index fcdfc5295efec531f86ae0a118d547fa9e3e869d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BoltMonkey/BoltMonkey_NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/d9e3bd73-cd7e-46d4-9e62-0cfac178f62a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BoltMonkey_NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/1762652579.498452", - "retrieved_timestamp": "1762652579.498454", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated", - "developer": "BoltMonkey", - "inference_platform": "unknown", - "id": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7998909559967553 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5151987922850448 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.401875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37333776595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BoltMonkey/BoltMonkey_NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/f83a5d67-b967-47c8-b76e-b58c445a3634.json b/leaderboard_data/HFOpenLLMv2/BoltMonkey/BoltMonkey_NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/f83a5d67-b967-47c8-b76e-b58c445a3634.json deleted file mode 100644 index 5f5c2c0a083544ab4a69340561fef068be0e1a60..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BoltMonkey/BoltMonkey_NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/f83a5d67-b967-47c8-b76e-b58c445a3634.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BoltMonkey_NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/1762652579.498964", - "retrieved_timestamp": "1762652579.498965", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated", - "developer": "BoltMonkey", - "inference_platform": "unknown", - "id": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45902316963434797 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5185441912447182 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4082604166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3631150265957447 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BoltMonkey/BoltMonkey_SuperNeuralDreadDevil-8b/2ad0eebb-31e3-4f28-aba6-073f33d5cbed.json b/leaderboard_data/HFOpenLLMv2/BoltMonkey/BoltMonkey_SuperNeuralDreadDevil-8b/2ad0eebb-31e3-4f28-aba6-073f33d5cbed.json deleted file mode 100644 index f3ce3a86e8987bffc0f59c2ea28a3a72c9009d6d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BoltMonkey/BoltMonkey_SuperNeuralDreadDevil-8b/2ad0eebb-31e3-4f28-aba6-073f33d5cbed.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BoltMonkey_SuperNeuralDreadDevil-8b/1762652579.499188", - "retrieved_timestamp": "1762652579.499189", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BoltMonkey/SuperNeuralDreadDevil-8b", - "developer": "BoltMonkey", - "inference_platform": "unknown", - "id": "BoltMonkey/SuperNeuralDreadDevil-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7709898624538447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5286196012035721 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09290030211480363 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39768749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36785239361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BramVanroy/BramVanroy_GEITje-7B-ultra/efcc28d3-ca6a-4100-afd2-75f9925354ba.json b/leaderboard_data/HFOpenLLMv2/BramVanroy/BramVanroy_GEITje-7B-ultra/efcc28d3-ca6a-4100-afd2-75f9925354ba.json deleted file mode 100644 index 32afe6b2208ee3733358a8a35ba11104e6cb0afb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BramVanroy/BramVanroy_GEITje-7B-ultra/efcc28d3-ca6a-4100-afd2-75f9925354ba.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BramVanroy_GEITje-7B-ultra/1762652579.499682", - "retrieved_timestamp": "1762652579.4996831", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BramVanroy/GEITje-7B-ultra", - "developer": "BramVanroy", - "inference_platform": "unknown", - "id": "BramVanroy/GEITje-7B-ultra" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3723442687624392 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37761612997305494 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32897916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20113031914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BramVanroy/BramVanroy_fietje-2-chat/faf20d1a-5a92-49b2-be69-903cafb9460a.json b/leaderboard_data/HFOpenLLMv2/BramVanroy/BramVanroy_fietje-2-chat/faf20d1a-5a92-49b2-be69-903cafb9460a.json deleted file mode 100644 index 755acdf50262b95831ab458e06186dc169bb1c05..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BramVanroy/BramVanroy_fietje-2-chat/faf20d1a-5a92-49b2-be69-903cafb9460a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BramVanroy_fietje-2-chat/1762652579.500146", - "retrieved_timestamp": "1762652579.5001469", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BramVanroy/fietje-2-chat", - "developer": "BramVanroy", - "inference_platform": "unknown", - "id": "BramVanroy/fietje-2-chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2917359273394593 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4149753717401999 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23993288590604026 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3527604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20545212765957446 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "PhiForCausalLM", - "params_billions": 2.775 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BramVanroy/BramVanroy_fietje-2-instruct/03e122da-30cc-4c2e-9b44-8261c3f2a934.json b/leaderboard_data/HFOpenLLMv2/BramVanroy/BramVanroy_fietje-2-instruct/03e122da-30cc-4c2e-9b44-8261c3f2a934.json deleted file mode 100644 index e42d671b504719a3a0b26adcb82de584060d900e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BramVanroy/BramVanroy_fietje-2-instruct/03e122da-30cc-4c2e-9b44-8261c3f2a934.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BramVanroy_fietje-2-instruct/1762652579.500353", - "retrieved_timestamp": "1762652579.500354", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BramVanroy/fietje-2-instruct", - "developer": "BramVanroy", - "inference_platform": "unknown", - "id": "BramVanroy/fietje-2-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2789963962286732 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41360714173029806 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.022658610271903322 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2332214765100671 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3369166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2103557180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "PhiForCausalLM", - "params_billions": 2.775 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/BramVanroy/BramVanroy_fietje-2/3712e2c3-0ed1-4dc9-95fc-4be0bec18675.json b/leaderboard_data/HFOpenLLMv2/BramVanroy/BramVanroy_fietje-2/3712e2c3-0ed1-4dc9-95fc-4be0bec18675.json deleted file mode 100644 index 5fbf7cf1ae27018d9aaf2c533b858089e69c2179..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/BramVanroy/BramVanroy_fietje-2/3712e2c3-0ed1-4dc9-95fc-4be0bec18675.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BramVanroy_fietje-2/1762652579.499938", - "retrieved_timestamp": "1762652579.499939", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BramVanroy/fietje-2", - "developer": "BramVanroy", - "inference_platform": "unknown", - "id": "BramVanroy/fietje-2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20980332185268422 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40356695178386187 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3695625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19855385638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_Llama-PLLuM-8B-chat/cb833a8b-81d7-41a6-bff2-9d0927703113.json b/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_Llama-PLLuM-8B-chat/cb833a8b-81d7-41a6-bff2-9d0927703113.json deleted file mode 100644 index eac0c8b8c30ec742a139c1914c0d59f51f325f98..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_Llama-PLLuM-8B-chat/cb833a8b-81d7-41a6-bff2-9d0927703113.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_Llama-PLLuM-8B-chat/1762652579.5008068", - "retrieved_timestamp": "1762652579.500808", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CYFRAGOVPL/Llama-PLLuM-8B-chat", - "developer": "CYFRAGOVPL", - "inference_platform": "unknown", - "id": "CYFRAGOVPL/Llama-PLLuM-8B-chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3514862786295917 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40770722535589576 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.033987915407854986 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41991666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27194148936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_PLLuM-12B-base/76833817-781e-4292-9fe8-5e8a1da7f962.json b/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_PLLuM-12B-base/76833817-781e-4292-9fe8-5e8a1da7f962.json deleted file mode 100644 index 57b80877cd62e210fa2b695af1aebc39f2350ad1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_PLLuM-12B-base/76833817-781e-4292-9fe8-5e8a1da7f962.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_PLLuM-12B-base/1762652579.501051", - "retrieved_timestamp": "1762652579.501052", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CYFRAGOVPL/PLLuM-12B-base", - "developer": "CYFRAGOVPL", - "inference_platform": "unknown", - "id": "CYFRAGOVPL/PLLuM-12B-base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2820937335159599 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4390596143784447 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4142395833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2740192819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_PLLuM-12B-chat/6e325f0f-b5db-4773-8179-7e949bd3f5f2.json b/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_PLLuM-12B-chat/6e325f0f-b5db-4773-8179-7e949bd3f5f2.json deleted file mode 100644 index f5de95952df14ad4940ab82458351efcb9e3577a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_PLLuM-12B-chat/6e325f0f-b5db-4773-8179-7e949bd3f5f2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_PLLuM-12B-chat/1762652579.501271", - "retrieved_timestamp": "1762652579.501272", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CYFRAGOVPL/PLLuM-12B-chat", - "developer": "CYFRAGOVPL", - "inference_platform": "unknown", - "id": "CYFRAGOVPL/PLLuM-12B-chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32143601200370575 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44458000333075703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4114791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2872340425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_PLLuM-12B-nc-base/e9b90a3b-09c6-4d3b-9aa3-6279ea3cccb5.json b/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_PLLuM-12B-nc-base/e9b90a3b-09c6-4d3b-9aa3-6279ea3cccb5.json deleted file mode 100644 index 9590ea3f8b41c178e0728fbff9b2eff6c130fbc1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_PLLuM-12B-nc-base/e9b90a3b-09c6-4d3b-9aa3-6279ea3cccb5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_PLLuM-12B-nc-base/1762652579.501493", - "retrieved_timestamp": "1762652579.501494", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CYFRAGOVPL/PLLuM-12B-nc-base", - "developer": "CYFRAGOVPL", - "inference_platform": "unknown", - "id": "CYFRAGOVPL/PLLuM-12B-nc-base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24045310886226323 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42767589675970014 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36451041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25590093085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_PLLuM-12B-nc-chat/fd19dada-5945-45d5-8a84-122404b8dd57.json b/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_PLLuM-12B-nc-chat/fd19dada-5945-45d5-8a84-122404b8dd57.json deleted file mode 100644 index 5b55bc1d9d414bed5464c75c042f392d4265f807..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_PLLuM-12B-nc-chat/fd19dada-5945-45d5-8a84-122404b8dd57.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_PLLuM-12B-nc-chat/1762652579.501705", - "retrieved_timestamp": "1762652579.501706", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CYFRAGOVPL/PLLuM-12B-nc-chat", - "developer": "CYFRAGOVPL", - "inference_platform": "unknown", - "id": "CYFRAGOVPL/PLLuM-12B-nc-chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28344237733657807 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45764328318815456 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4353541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25972406914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/CarrotAI/CarrotAI_Llama-3.2-Rabbit-Ko-3B-Instruct-2412/41809335-e00c-4911-bc08-6edd71891585.json b/leaderboard_data/HFOpenLLMv2/CarrotAI/CarrotAI_Llama-3.2-Rabbit-Ko-3B-Instruct-2412/41809335-e00c-4911-bc08-6edd71891585.json deleted file mode 100644 index 223ac119e89ddcfbd063ef5ab6bafedd9542c87f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/CarrotAI/CarrotAI_Llama-3.2-Rabbit-Ko-3B-Instruct-2412/41809335-e00c-4911-bc08-6edd71891585.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CarrotAI_Llama-3.2-Rabbit-Ko-3B-Instruct-2412/1762652579.5021691", - "retrieved_timestamp": "1762652579.50217", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct-2412", - "developer": "CarrotAI", - "inference_platform": "unknown", - "id": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct-2412" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47818233398493776 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43577246498246686 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3872083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31341422872340424 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/CarrotAI/CarrotAI_Llama-3.2-Rabbit-Ko-3B-Instruct/8c56b973-d5cb-48b6-a43e-ad50769b1f40.json b/leaderboard_data/HFOpenLLMv2/CarrotAI/CarrotAI_Llama-3.2-Rabbit-Ko-3B-Instruct/8c56b973-d5cb-48b6-a43e-ad50769b1f40.json deleted file mode 100644 index 08745a2a060c3f5c46a2f4ef559af65072b96595..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/CarrotAI/CarrotAI_Llama-3.2-Rabbit-Ko-3B-Instruct/8c56b973-d5cb-48b6-a43e-ad50769b1f40.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CarrotAI_Llama-3.2-Rabbit-Ko-3B-Instruct/1762652579.501917", - "retrieved_timestamp": "1762652579.5019178", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct", - "developer": "CarrotAI", - "inference_platform": "unknown", - "id": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7198821349574684 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4426719080820793 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3649166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2822473404255319 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Casual-Autopsy/Casual-Autopsy_L3-Umbral-Mind-RP-v2.0-8B/da5c1edf-bd74-48a3-ad76-a4bd89539b7f.json b/leaderboard_data/HFOpenLLMv2/Casual-Autopsy/Casual-Autopsy_L3-Umbral-Mind-RP-v2.0-8B/da5c1edf-bd74-48a3-ad76-a4bd89539b7f.json deleted file mode 100644 index c65d28fddba9b5ccb86bcce61529587494c5384f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Casual-Autopsy/Casual-Autopsy_L3-Umbral-Mind-RP-v2.0-8B/da5c1edf-bd74-48a3-ad76-a4bd89539b7f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Casual-Autopsy_L3-Umbral-Mind-RP-v2.0-8B/1762652579.502389", - "retrieved_timestamp": "1762652579.502389", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Casual-Autopsy/L3-Umbral-Mind-RP-v2.0-8B", - "developer": "Casual-Autopsy", - "inference_platform": "unknown", - "id": "Casual-Autopsy/L3-Umbral-Mind-RP-v2.0-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7122634609502786 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5262406145493724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1095166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3723404255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/CausalLM/CausalLM_14B/c4376867-854d-44fa-9215-b9c1af7612a4.json b/leaderboard_data/HFOpenLLMv2/CausalLM/CausalLM_14B/c4376867-854d-44fa-9215-b9c1af7612a4.json deleted file mode 100644 index 9edbf818f82b3f2e07323d4921b60c732ff67b38..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/CausalLM/CausalLM_14B/c4376867-854d-44fa-9215-b9c1af7612a4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CausalLM_14B/1762652579.502646", - "retrieved_timestamp": "1762652579.502647", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CausalLM/14B", - "developer": "CausalLM", - "inference_platform": "unknown", - "id": "CausalLM/14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2788213052478535 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4700462397700626 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0755287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4154791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221409574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/CausalLM/CausalLM_34b-beta/cc482ca4-031a-4c22-90c2-68322184125b.json b/leaderboard_data/HFOpenLLMv2/CausalLM/CausalLM_34b-beta/cc482ca4-031a-4c22-90c2-68322184125b.json deleted file mode 100644 index d67cbc79017b1f0d1c1c77dfccd76774d7b485e1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/CausalLM/CausalLM_34b-beta/cc482ca4-031a-4c22-90c2-68322184125b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CausalLM_34b-beta/1762652579.502916", - "retrieved_timestamp": "1762652579.502916", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CausalLM/34b-beta", - "developer": "CausalLM", - "inference_platform": "unknown", - "id": "CausalLM/34b-beta" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3043247472262486 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5590996102136266 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37486458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5324966755319149 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/CausalLM/CausalLM_preview-1-hf/e9fcf09c-14e2-4226-b1e5-b5752ac1a753.json b/leaderboard_data/HFOpenLLMv2/CausalLM/CausalLM_preview-1-hf/e9fcf09c-14e2-4226-b1e5-b5752ac1a753.json deleted file mode 100644 index 744daa3ad804cc501505d84c2f08fc90dbd8fedb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/CausalLM/CausalLM_preview-1-hf/e9fcf09c-14e2-4226-b1e5-b5752ac1a753.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CausalLM_preview-1-hf/1762652579.503128", - "retrieved_timestamp": "1762652579.503129", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CausalLM/preview-1-hf", - "developer": "CausalLM", - "inference_platform": "unknown", - "id": "CausalLM/preview-1-hf" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5558928088582737 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3614567463880903 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34218750000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35970744680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GlmForCausalLM", - "params_billions": 9.543 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Changgil/Changgil_K2S3-14b-v0.2/4dfe2d3c-7fc3-4b57-8acd-02b0808ccdb1.json b/leaderboard_data/HFOpenLLMv2/Changgil/Changgil_K2S3-14b-v0.2/4dfe2d3c-7fc3-4b57-8acd-02b0808ccdb1.json deleted file mode 100644 index 01ffe028972970c2f7769f87927e75c04046b123..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Changgil/Changgil_K2S3-14b-v0.2/4dfe2d3c-7fc3-4b57-8acd-02b0808ccdb1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Changgil_K2S3-14b-v0.2/1762652579.503338", - "retrieved_timestamp": "1762652579.503339", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Changgil/K2S3-14b-v0.2", - "developer": "Changgil", - "inference_platform": "unknown", - "id": "Changgil/K2S3-14b-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3242840108689389 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4613311786298187 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3922604166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2643783244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 14.352 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Changgil/Changgil_K2S3-v0.1/225bc36b-4bfb-4818-8601-903e7f9decb3.json b/leaderboard_data/HFOpenLLMv2/Changgil/Changgil_K2S3-v0.1/225bc36b-4bfb-4818-8601-903e7f9decb3.json deleted file mode 100644 index 885ce58b95a10a560ede35872ae77f69d0fe3f20..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Changgil/Changgil_K2S3-v0.1/225bc36b-4bfb-4818-8601-903e7f9decb3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Changgil_K2S3-v0.1/1762652579.503593", - "retrieved_timestamp": "1762652579.503594", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Changgil/K2S3-v0.1", - "developer": "Changgil", - "inference_platform": "unknown", - "id": "Changgil/K2S3-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32765617450586665 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46554920672286154 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40140624999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2562333776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 14.352 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ClaudioItaly/ClaudioItaly_Albacus/0be5437b-2489-4107-8c38-d0cd198a2d8c.json b/leaderboard_data/HFOpenLLMv2/ClaudioItaly/ClaudioItaly_Albacus/0be5437b-2489-4107-8c38-d0cd198a2d8c.json deleted file mode 100644 index 77561cff43410d0838e495954d0564921cb1af79..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ClaudioItaly/ClaudioItaly_Albacus/0be5437b-2489-4107-8c38-d0cd198a2d8c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ClaudioItaly_Albacus/1762652579.503804", - "retrieved_timestamp": "1762652579.503805", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ClaudioItaly/Albacus", - "developer": "ClaudioItaly", - "inference_platform": "unknown", - "id": "ClaudioItaly/Albacus" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4667415790103592 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5113043406568835 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07099697885196375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41353124999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31648936170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.987 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ClaudioItaly/ClaudioItaly_Book-Gut12B/b2bdf337-9065-4a67-aa1a-5ba8751d5438.json b/leaderboard_data/HFOpenLLMv2/ClaudioItaly/ClaudioItaly_Book-Gut12B/b2bdf337-9065-4a67-aa1a-5ba8751d5438.json deleted file mode 100644 index 2c1e3253265764a52bc3b6d025a12cbc50582190..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ClaudioItaly/ClaudioItaly_Book-Gut12B/b2bdf337-9065-4a67-aa1a-5ba8751d5438.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ClaudioItaly_Book-Gut12B/1762652579.504094", - "retrieved_timestamp": "1762652579.504095", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ClaudioItaly/Book-Gut12B", - "developer": "ClaudioItaly", - "inference_platform": "unknown", - "id": "ClaudioItaly/Book-Gut12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39984685080032095 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5417370194443233 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4635416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3670212765957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ClaudioItaly/ClaudioItaly_Evolutionstory-7B-v2.2/e06c19ce-9247-473b-b5db-8686fee5e785.json b/leaderboard_data/HFOpenLLMv2/ClaudioItaly/ClaudioItaly_Evolutionstory-7B-v2.2/e06c19ce-9247-473b-b5db-8686fee5e785.json deleted file mode 100644 index 333f59d96d735bafd3fb0ed7ecfdb618a37d08e5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ClaudioItaly/ClaudioItaly_Evolutionstory-7B-v2.2/e06c19ce-9247-473b-b5db-8686fee5e785.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ClaudioItaly_Evolutionstory-7B-v2.2/1762652579.504309", - "retrieved_timestamp": "1762652579.504309", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ClaudioItaly/Evolutionstory-7B-v2.2", - "developer": "ClaudioItaly", - "inference_platform": "unknown", - "id": "ClaudioItaly/Evolutionstory-7B-v2.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4813794066410457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5108043406568835 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07099697885196375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41353124999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31590757978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ClaudioItaly/ClaudioItaly_intelligence-cod-rag-7b-v3/51559a6d-1262-41e2-8092-008dc8f53974.json b/leaderboard_data/HFOpenLLMv2/ClaudioItaly/ClaudioItaly_intelligence-cod-rag-7b-v3/51559a6d-1262-41e2-8092-008dc8f53974.json deleted file mode 100644 index 3e0fdf20ac4c6035de5afe3aa742658115d6b1ca..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ClaudioItaly/ClaudioItaly_intelligence-cod-rag-7b-v3/51559a6d-1262-41e2-8092-008dc8f53974.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ClaudioItaly_intelligence-cod-rag-7b-v3/1762652579.504531", - "retrieved_timestamp": "1762652579.504531", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ClaudioItaly/intelligence-cod-rag-7b-v3", - "developer": "ClaudioItaly", - "inference_platform": "unknown", - "id": "ClaudioItaly/intelligence-cod-rag-7b-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6897820006471718 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5366339718839108 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3806646525679758 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4152708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4195478723404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_aya-23-35B/9c77aa3f-080c-4dd6-8a9d-50d18657de35.json b/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_aya-23-35B/9c77aa3f-080c-4dd6-8a9d-50d18657de35.json deleted file mode 100644 index 31739a37ec4a98173a0a053588a5a6414f3560ec..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_aya-23-35B/9c77aa3f-080c-4dd6-8a9d-50d18657de35.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CohereForAI_aya-23-35B/1762652579.5047522", - "retrieved_timestamp": "1762652579.5047529", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CohereForAI/aya-23-35B", - "developer": "CohereForAI", - "inference_platform": "unknown", - "id": "CohereForAI/aya-23-35B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6461932117891638 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5399551450731271 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4309895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33560505319148937 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "CohereForCausalLM", - "params_billions": 34.981 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_aya-23-8B/2ff655cd-9123-4577-832b-3f0b04f7d466.json b/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_aya-23-8B/2ff655cd-9123-4577-832b-3f0b04f7d466.json deleted file mode 100644 index ae8daf4a1e83770ba740b628adb863dbbefc67b6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_aya-23-8B/2ff655cd-9123-4577-832b-3f0b04f7d466.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CohereForAI_aya-23-8B/1762652579.5050838", - "retrieved_timestamp": "1762652579.505085", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CohereForAI/aya-23-8B", - "developer": "CohereForAI", - "inference_platform": "unknown", - "id": "CohereForAI/aya-23-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4698887839820565 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4296161519220307 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3940625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2278091755319149 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "CohereForCausalLM", - "params_billions": 8.028 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_aya-expanse-32b/ebbe9a61-6dff-467a-b77c-7c125a043832.json b/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_aya-expanse-32b/ebbe9a61-6dff-467a-b77c-7c125a043832.json deleted file mode 100644 index df351f0515c8d11e179ddb3d512495047f091e35..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_aya-expanse-32b/ebbe9a61-6dff-467a-b77c-7c125a043832.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CohereForAI_aya-expanse-32b/1762652579.505483", - "retrieved_timestamp": "1762652579.505484", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CohereForAI/aya-expanse-32b", - "developer": "CohereForAI", - "inference_platform": "unknown", - "id": "CohereForAI/aya-expanse-32b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7301737168490716 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5648670099212114 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15332326283987915 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3872708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41298204787234044 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "CohereForCausalLM", - "params_billions": 32.296 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_aya-expanse-8b/3d54299c-ae39-45f4-b31c-c0667dcbe9f4.json b/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_aya-expanse-8b/3d54299c-ae39-45f4-b31c-c0667dcbe9f4.json deleted file mode 100644 index b407718e14b2658516611acb7a6ae77e8fe885a9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_aya-expanse-8b/3d54299c-ae39-45f4-b31c-c0667dcbe9f4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CohereForAI_aya-expanse-8b/1762652579.505729", - "retrieved_timestamp": "1762652579.5057302", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CohereForAI/aya-expanse-8b", - "developer": "CohereForAI", - "inference_platform": "unknown", - "id": "CohereForAI/aya-expanse-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6358517622131501 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4977203055736406 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08610271903323263 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37288541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3003656914893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "CohereForCausalLM", - "params_billions": 8.028 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_c4ai-command-r-plus-08-2024/f1ef3dda-1b62-4ec9-9c88-a8e60b8a8f6d.json b/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_c4ai-command-r-plus-08-2024/f1ef3dda-1b62-4ec9-9c88-a8e60b8a8f6d.json deleted file mode 100644 index 542a30489f4a9a04d1113baf0028b56756cb7a5d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_c4ai-command-r-plus-08-2024/f1ef3dda-1b62-4ec9-9c88-a8e60b8a8f6d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CohereForAI_c4ai-command-r-plus-08-2024/1762652579.506166", - "retrieved_timestamp": "1762652579.506167", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CohereForAI/c4ai-command-r-plus-08-2024", - "developer": "CohereForAI", - "inference_platform": "unknown", - "id": "CohereForAI/c4ai-command-r-plus-08-2024" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7539539532883859 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5995999913027185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48294791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44207114361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "CohereForCausalLM", - "params_billions": 103.811 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_c4ai-command-r-plus/c5326cd1-8e73-4f84-8efb-49b3be5c50e7.json b/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_c4ai-command-r-plus/c5326cd1-8e73-4f84-8efb-49b3be5c50e7.json deleted file mode 100644 index 833b34d48bb1bd419e027e9d635f0fda275ae87f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_c4ai-command-r-plus/c5326cd1-8e73-4f84-8efb-49b3be5c50e7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CohereForAI_c4ai-command-r-plus/1762652579.50595", - "retrieved_timestamp": "1762652579.505951", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CohereForAI/c4ai-command-r-plus", - "developer": "CohereForAI", - "inference_platform": "unknown", - "id": "CohereForAI/c4ai-command-r-plus" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7664186580495308 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.581542357407793 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08006042296072508 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48071875000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3991855053191489 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "CohereForCausalLM", - "params_billions": 103.811 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_c4ai-command-r-v01/cd24b18c-faff-44e1-87d6-735bcb9ab465.json b/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_c4ai-command-r-v01/cd24b18c-faff-44e1-87d6-735bcb9ab465.json deleted file mode 100644 index 3d141a9dfd4579f552758c3b5da92a8d3bb15545..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_c4ai-command-r-v01/cd24b18c-faff-44e1-87d6-735bcb9ab465.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CohereForAI_c4ai-command-r-v01/1762652579.506387", - "retrieved_timestamp": "1762652579.506388", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CohereForAI/c4ai-command-r-v01", - "developer": "CohereForAI", - "inference_platform": "unknown", - "id": "CohereForAI/c4ai-command-r-v01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6748194789824333 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5406415512767856 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45169791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3369348404255319 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "CohereForCausalLM", - "params_billions": 34.981 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_c4ai-command-r7b-12-2024/85fa7edb-df5c-4baa-a0f1-c520db55c08c.json b/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_c4ai-command-r7b-12-2024/85fa7edb-df5c-4baa-a0f1-c520db55c08c.json deleted file mode 100644 index 5029fa4395e21078f3244f0516f7109e08430e6c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_c4ai-command-r7b-12-2024/85fa7edb-df5c-4baa-a0f1-c520db55c08c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CohereForAI_c4ai-command-r7b-12-2024/1762652579.5066051", - "retrieved_timestamp": "1762652579.506606", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CohereForAI/c4ai-command-r7b-12-2024", - "developer": "CohereForAI", - "inference_platform": "unknown", - "id": "CohereForAI/c4ai-command-r7b-12-2024" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7713145564878965 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5502642151855635 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2990936555891239 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41251041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3572140957446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Cohere2ForCausalLM", - "params_billions": 8.028 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-Gemma-2b-dpo-v1.0/76f198aa-0aa5-4c98-8d86-20410582d3a5.json b/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-Gemma-2b-dpo-v1.0/76f198aa-0aa5-4c98-8d86-20410582d3a5.json deleted file mode 100644 index cb479a78237311135af3d5a6e38acc5ccbab19f6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-Gemma-2b-dpo-v1.0/76f198aa-0aa5-4c98-8d86-20410582d3a5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-Gemma-2b-dpo-v1.0/1762652579.506829", - "retrieved_timestamp": "1762652579.50683", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Columbia-NLP/LION-Gemma-2b-dpo-v1.0", - "developer": "Columbia-NLP", - "inference_platform": "unknown", - "id": "Columbia-NLP/LION-Gemma-2b-dpo-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3278312654866864 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39199563613207467 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41201041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16655585106382978 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-Gemma-2b-dpo-v1.0/f39ad9a4-b02a-415e-b83a-53d705b6bea2.json b/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-Gemma-2b-dpo-v1.0/f39ad9a4-b02a-415e-b83a-53d705b6bea2.json deleted file mode 100644 index b3988b3d82389607de12d9e360d59f24782ffe29..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-Gemma-2b-dpo-v1.0/f39ad9a4-b02a-415e-b83a-53d705b6bea2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-Gemma-2b-dpo-v1.0/1762652579.507083", - "retrieved_timestamp": "1762652579.507083", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Columbia-NLP/LION-Gemma-2b-dpo-v1.0", - "developer": "Columbia-NLP", - "inference_platform": "unknown", - "id": "Columbia-NLP/LION-Gemma-2b-dpo-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3102457036219453 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38810309159554507 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4080729166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16647273936170212 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-Gemma-2b-sft-v1.0/0cb84d3d-4f5d-4afc-9c49-de567f2ffbcb.json b/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-Gemma-2b-sft-v1.0/0cb84d3d-4f5d-4afc-9c49-de567f2ffbcb.json deleted file mode 100644 index ddfa65d1eeb033b088ca7b55399e255c67c7308a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-Gemma-2b-sft-v1.0/0cb84d3d-4f5d-4afc-9c49-de567f2ffbcb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-Gemma-2b-sft-v1.0/1762652579.507553", - "retrieved_timestamp": "1762652579.507553", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Columbia-NLP/LION-Gemma-2b-sft-v1.0", - "developer": "Columbia-NLP", - "inference_platform": "unknown", - "id": "Columbia-NLP/LION-Gemma-2b-sft-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3692469314751526 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.387877927616119 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4027395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17819148936170212 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-LLaMA-3-8b-dpo-v1.0/bf83f2be-f684-4ba7-b244-c5cb10f8f0b1.json b/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-LLaMA-3-8b-dpo-v1.0/bf83f2be-f684-4ba7-b244-c5cb10f8f0b1.json deleted file mode 100644 index c8e5a76262fedda51408c00187d6a7f59d5848a6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-LLaMA-3-8b-dpo-v1.0/bf83f2be-f684-4ba7-b244-c5cb10f8f0b1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-LLaMA-3-8b-dpo-v1.0/1762652579.5077918", - "retrieved_timestamp": "1762652579.507793", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Columbia-NLP/LION-LLaMA-3-8b-dpo-v1.0", - "developer": "Columbia-NLP", - "inference_platform": "unknown", - "id": "Columbia-NLP/LION-LLaMA-3-8b-dpo-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4957424079220912 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5028481044452986 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40971874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3218916223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-LLaMA-3-8b-sft-v1.0/23c9a71d-3504-497d-a0e2-6a5e299346e5.json b/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-LLaMA-3-8b-sft-v1.0/23c9a71d-3504-497d-a0e2-6a5e299346e5.json deleted file mode 100644 index bd254e8cfa09764353339e38c29131a5b6074a2f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-LLaMA-3-8b-sft-v1.0/23c9a71d-3504-497d-a0e2-6a5e299346e5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-LLaMA-3-8b-sft-v1.0/1762652579.5082712", - "retrieved_timestamp": "1762652579.5082722", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Columbia-NLP/LION-LLaMA-3-8b-sft-v1.0", - "developer": "Columbia-NLP", - "inference_platform": "unknown", - "id": "Columbia-NLP/LION-LLaMA-3-8b-sft-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171163623629745 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5087766443418147 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45027083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32372007978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/CombinHorizon/CombinHorizon_YiSM-blossom5.1-34B-SLERP/91ec4ba1-6948-48e8-8db0-a335b982c560.json b/leaderboard_data/HFOpenLLMv2/CombinHorizon/CombinHorizon_YiSM-blossom5.1-34B-SLERP/91ec4ba1-6948-48e8-8db0-a335b982c560.json deleted file mode 100644 index 5fdc46d788dc4383159990d8837a5c8a6d4416bd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/CombinHorizon/CombinHorizon_YiSM-blossom5.1-34B-SLERP/91ec4ba1-6948-48e8-8db0-a335b982c560.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CombinHorizon_YiSM-blossom5.1-34B-SLERP/1762652579.508977", - "retrieved_timestamp": "1762652579.508977", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CombinHorizon/YiSM-blossom5.1-34B-SLERP", - "developer": "CombinHorizon", - "inference_platform": "unknown", - "id": "CombinHorizon/YiSM-blossom5.1-34B-SLERP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5033112142448702 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6207548093635428 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21525679758308158 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35570469798657717 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44134375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4740691489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ContactDoctor/ContactDoctor_Bio-Medical-3B-CoT-012025/4ad50c15-9b6d-40c8-b8ce-74253ecfe258.json b/leaderboard_data/HFOpenLLMv2/ContactDoctor/ContactDoctor_Bio-Medical-3B-CoT-012025/4ad50c15-9b6d-40c8-b8ce-74253ecfe258.json deleted file mode 100644 index 056d276ca549a4c8893cf755c7a9f75aacbcebc8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ContactDoctor/ContactDoctor_Bio-Medical-3B-CoT-012025/4ad50c15-9b6d-40c8-b8ce-74253ecfe258.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ContactDoctor_Bio-Medical-3B-CoT-012025/1762652579.509939", - "retrieved_timestamp": "1762652579.509939", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ContactDoctor/Bio-Medical-3B-CoT-012025", - "developer": "ContactDoctor", - "inference_platform": "unknown", - "id": "ContactDoctor/Bio-Medical-3B-CoT-012025" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.360379349016166 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.438315337642466 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2212990936555891 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3367604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2933843085106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.085 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Corianas/Corianas_Quokka_2.7b/54015982-408c-469b-86da-6642f5708180.json b/leaderboard_data/HFOpenLLMv2/Corianas/Corianas_Quokka_2.7b/54015982-408c-469b-86da-6642f5708180.json deleted file mode 100644 index 49f57e5e2f1bf544bc41a789b5843e366f85e2f2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Corianas/Corianas_Quokka_2.7b/54015982-408c-469b-86da-6642f5708180.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Corianas_Quokka_2.7b/1762652579.5120142", - "retrieved_timestamp": "1762652579.512015", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Corianas/Quokka_2.7b", - "developer": "Corianas", - "inference_platform": "unknown", - "id": "Corianas/Quokka_2.7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17490702447284318 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3055474937424842 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3908333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11452792553191489 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPT2LMHeadModel", - "params_billions": 2.786 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/CortexLM/CortexLM_btlm-7b-base-v0.2/aded7428-1283-4ed8-b068-cc1a5ea92dca.json b/leaderboard_data/HFOpenLLMv2/CortexLM/CortexLM_btlm-7b-base-v0.2/aded7428-1283-4ed8-b068-cc1a5ea92dca.json deleted file mode 100644 index 5be4a7ab929338bf9d868f7343863a72532774a1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/CortexLM/CortexLM_btlm-7b-base-v0.2/aded7428-1283-4ed8-b068-cc1a5ea92dca.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CortexLM_btlm-7b-base-v0.2/1762652579.512528", - "retrieved_timestamp": "1762652579.512528", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CortexLM/btlm-7b-base-v0.2", - "developer": "CortexLM", - "inference_platform": "unknown", - "id": "CortexLM/btlm-7b-base-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14832865685270635 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4006411985841813 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38460416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2349567819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.885 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_SCE-2-24B/f4ff02eb-7763-41bc-8a86-adbb051603af.json b/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_SCE-2-24B/f4ff02eb-7763-41bc-8a86-adbb051603af.json deleted file mode 100644 index 81adea72eb908d127880b8667dcce93ac56b0d3e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_SCE-2-24B/f4ff02eb-7763-41bc-8a86-adbb051603af.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Cran-May_SCE-2-24B/1762652579.512776", - "retrieved_timestamp": "1762652579.5127769", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Cran-May/SCE-2-24B", - "developer": "Cran-May", - "inference_platform": "unknown", - "id": "Cran-May/SCE-2-24B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5865924635522636 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6264692798019763 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18957703927492447 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4528125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.461186835106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_SCE-3-24B/2d7b9092-a9ad-4f47-b186-db1e1ce7cd6c.json b/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_SCE-3-24B/2d7b9092-a9ad-4f47-b186-db1e1ce7cd6c.json deleted file mode 100644 index 395ba6542d8133158852baa4cbc3b4d02811d645..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_SCE-3-24B/2d7b9092-a9ad-4f47-b186-db1e1ce7cd6c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Cran-May_SCE-3-24B/1762652579.513022", - "retrieved_timestamp": "1762652579.513023", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Cran-May/SCE-3-24B", - "developer": "Cran-May", - "inference_platform": "unknown", - "id": "Cran-May/SCE-3-24B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5465254413844156 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.597283045074691 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18806646525679757 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44347916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4646775265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_T.E-8.1/9c9e0887-5561-4789-9521-a3a78e7cfd99.json b/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_T.E-8.1/9c9e0887-5561-4789-9521-a3a78e7cfd99.json deleted file mode 100644 index fba7e4e44a0f506b5118c6c27c0c9fc7df45a576..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_T.E-8.1/9c9e0887-5561-4789-9521-a3a78e7cfd99.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Cran-May_T.E-8.1/1762652579.513231", - "retrieved_timestamp": "1762652579.513231", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Cran-May/T.E-8.1", - "developer": "Cran-May", - "inference_platform": "unknown", - "id": "Cran-May/T.E-8.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7076922565459647 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5581754708123893 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44561933534743203 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4505208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4432347074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_merge_model_20250308_2/c457473c-6c40-4930-94b8-993d3b1e8937.json b/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_merge_model_20250308_2/c457473c-6c40-4930-94b8-993d3b1e8937.json deleted file mode 100644 index cad42a0b7693f6f38a24913cbea13c3b4456ee4e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_merge_model_20250308_2/c457473c-6c40-4930-94b8-993d3b1e8937.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Cran-May_merge_model_20250308_2/1762652579.51357", - "retrieved_timestamp": "1762652579.5135732", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Cran-May/merge_model_20250308_2", - "developer": "Cran-May", - "inference_platform": "unknown", - "id": "Cran-May/merge_model_20250308_2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5932370554572978 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6585311075974459 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39093959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4793541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5419714095744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_merge_model_20250308_3/5448dbb6-9874-4734-8252-369c7b0189d7.json b/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_merge_model_20250308_3/5448dbb6-9874-4734-8252-369c7b0189d7.json deleted file mode 100644 index 83ef5d528ff3ae4727d67b387717199039c252b9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_merge_model_20250308_3/5448dbb6-9874-4734-8252-369c7b0189d7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Cran-May_merge_model_20250308_3/1762652579.513911", - "retrieved_timestamp": "1762652579.513912", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Cran-May/merge_model_20250308_3", - "developer": "Cran-May", - "inference_platform": "unknown", - "id": "Cran-May/merge_model_20250308_3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6017799438822324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6271459892225041 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2545317220543807 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43204166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49617686170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_merge_model_20250308_4/45531924-35ad-4baf-9994-5d5fa3bafd02.json b/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_merge_model_20250308_4/45531924-35ad-4baf-9994-5d5fa3bafd02.json deleted file mode 100644 index 1b6f658741a0f47f7ae5fd2c1548ce0f18ba42b2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_merge_model_20250308_4/45531924-35ad-4baf-9994-5d5fa3bafd02.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Cran-May_merge_model_20250308_4/1762652579.514166", - "retrieved_timestamp": "1762652579.514167", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Cran-May/merge_model_20250308_4", - "developer": "Cran-May", - "inference_platform": "unknown", - "id": "Cran-May/merge_model_20250308_4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4539521802151624 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.666435217186487 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4199395770392749 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3976510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4688125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5366522606382979 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_tempmotacilla-cinerea-0308/5e5e70f4-c597-415c-ab74-17aaf55b7b28.json b/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_tempmotacilla-cinerea-0308/5e5e70f4-c597-415c-ab74-17aaf55b7b28.json deleted file mode 100644 index a0a8a2f33bb6037e05930b79efcb82f5a5c26121..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_tempmotacilla-cinerea-0308/5e5e70f4-c597-415c-ab74-17aaf55b7b28.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Cran-May_tempmotacilla-cinerea-0308/1762652579.514418", - "retrieved_timestamp": "1762652579.5144188", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Cran-May/tempmotacilla-cinerea-0308", - "developer": "Cran-May", - "inference_platform": "unknown", - "id": "Cran-May/tempmotacilla-cinerea-0308" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8084837121061007 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6550960569488126 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5551359516616314 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624161073825503 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42082291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5250166223404256 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/CultriX/CultriX_Qwen2.5-14B-Wernicke-SFT/84bc884e-29be-40b5-bfe2-6147bec90a78.json b/leaderboard_data/HFOpenLLMv2/CultriX/CultriX_Qwen2.5-14B-Wernicke-SFT/84bc884e-29be-40b5-bfe2-6147bec90a78.json deleted file mode 100644 index 8e4e919a1ee211b27e694ac6040dbd345108527f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/CultriX/CultriX_Qwen2.5-14B-Wernicke-SFT/84bc884e-29be-40b5-bfe2-6147bec90a78.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Wernicke-SFT/1762652579.520046", - "retrieved_timestamp": "1762652579.5200472", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Wernicke-SFT", - "developer": "CultriX", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Wernicke-SFT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4937443760333692 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6460586236565512 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540268456375839 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38999999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5069813829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/CultriX/CultriX_Qwestion-14B/c6ad96f2-fcb9-47c5-8106-936436b6ad1b.json b/leaderboard_data/HFOpenLLMv2/CultriX/CultriX_Qwestion-14B/c6ad96f2-fcb9-47c5-8106-936436b6ad1b.json deleted file mode 100644 index 316cecff4dfa2ca69d6900ce51cfae17e47c3d2f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/CultriX/CultriX_Qwestion-14B/c6ad96f2-fcb9-47c5-8106-936436b6ad1b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_Qwestion-14B/1762652579.521322", - "retrieved_timestamp": "1762652579.521322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/Qwestion-14B", - "developer": "CultriX", - "inference_platform": "unknown", - "id": "CultriX/Qwestion-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6317803428237078 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6450104739140539 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3723564954682779 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36828859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46360416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.542220744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DRXD1000/DRXD1000_Atlas-7B/1f223500-a1d6-471f-b3cf-2575ab5a52c8.json b/leaderboard_data/HFOpenLLMv2/DRXD1000/DRXD1000_Atlas-7B/1f223500-a1d6-471f-b3cf-2575ab5a52c8.json deleted file mode 100644 index 0f88fcd87a4a2f2f8b62d5f807463cd99799f093..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DRXD1000/DRXD1000_Atlas-7B/1f223500-a1d6-471f-b3cf-2575ab5a52c8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DRXD1000_Atlas-7B/1762652579.5232708", - "retrieved_timestamp": "1762652579.523272", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DRXD1000/Atlas-7B", - "developer": "DRXD1000", - "inference_platform": "unknown", - "id": "DRXD1000/Atlas-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3704459722425387 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3302176697760134 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33425 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14012632978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.768 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DRXD1000/DRXD1000_Phoenix-7B/bff80553-e91f-470e-923c-7f8103d37fca.json b/leaderboard_data/HFOpenLLMv2/DRXD1000/DRXD1000_Phoenix-7B/bff80553-e91f-470e-923c-7f8103d37fca.json deleted file mode 100644 index d99e78a451437454865f9532d3dbdb377776785b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DRXD1000/DRXD1000_Phoenix-7B/bff80553-e91f-470e-923c-7f8103d37fca.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DRXD1000_Phoenix-7B/1762652579.5236301", - "retrieved_timestamp": "1762652579.523632", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DRXD1000/Phoenix-7B", - "developer": "DRXD1000", - "inference_platform": "unknown", - "id": "DRXD1000/Phoenix-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3209617149164218 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3931566034728218 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38494791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23429188829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DUAL-GPO/DUAL-GPO_zephyr-7b-ipo-0k-15k-i1/a4cd4144-75d5-4c48-a936-96d70f052a66.json b/leaderboard_data/HFOpenLLMv2/DUAL-GPO/DUAL-GPO_zephyr-7b-ipo-0k-15k-i1/a4cd4144-75d5-4c48-a936-96d70f052a66.json deleted file mode 100644 index 57113a6376a3e9aa03b34cf1b888681801225945..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DUAL-GPO/DUAL-GPO_zephyr-7b-ipo-0k-15k-i1/a4cd4144-75d5-4c48-a936-96d70f052a66.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DUAL-GPO_zephyr-7b-ipo-0k-15k-i1/1762652579.523929", - "retrieved_timestamp": "1762652579.52393", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DUAL-GPO/zephyr-7b-ipo-0k-15k-i1", - "developer": "DUAL-GPO", - "inference_platform": "unknown", - "id": "DUAL-GPO/zephyr-7b-ipo-0k-15k-i1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27562423259174545 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4472712447565954 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41734374999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31299867021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 14.483 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DZgas/DZgas_GIGABATEMAN-7B/180be3a9-1d8e-4705-bda4-032bc66768c6.json b/leaderboard_data/HFOpenLLMv2/DZgas/DZgas_GIGABATEMAN-7B/180be3a9-1d8e-4705-bda4-032bc66768c6.json deleted file mode 100644 index 100874c4a213c04812f411c6e9a3c39a10ac5c6f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DZgas/DZgas_GIGABATEMAN-7B/180be3a9-1d8e-4705-bda4-032bc66768c6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DZgas_GIGABATEMAN-7B/1762652579.524226", - "retrieved_timestamp": "1762652579.5242271", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DZgas/GIGABATEMAN-7B", - "developer": "DZgas", - "inference_platform": "unknown", - "id": "DZgas/GIGABATEMAN-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46074637517342876 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5032184342862756 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43284374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3176529255319149 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherDrake-SFT/843cbaa0-5d9d-47a8-ae69-fe38a5812136.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherDrake-SFT/843cbaa0-5d9d-47a8-ae69-fe38a5812136.json deleted file mode 100644 index 89499bf6d98803a34f3a74e6a001c076f1b5b6ec..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherDrake-SFT/843cbaa0-5d9d-47a8-ae69-fe38a5812136.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_AetherDrake-SFT/1762652579.524555", - "retrieved_timestamp": "1762652579.524556", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/AetherDrake-SFT", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/AetherDrake-SFT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4812796712722244 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48720075507220245 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1510574018126888 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40884375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34990026595744683 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherSett/791a8f9f-5c85-42e5-a06d-270118b0c7c2.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherSett/791a8f9f-5c85-42e5-a06d-270118b0c7c2.json deleted file mode 100644 index 4bb4a855620b37eb124a5122be1543b2f1c6ee17..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherSett/791a8f9f-5c85-42e5-a06d-270118b0c7c2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_AetherSett/1762652579.524883", - "retrieved_timestamp": "1762652579.524884", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/AetherSett", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/AetherSett" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5369586031729146 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5451624435465484 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3972809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46031249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4278590425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherTOT/8ac4547d-2b57-4227-a63d-05da4f3ccbc7.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherTOT/8ac4547d-2b57-4227-a63d-05da4f3ccbc7.json deleted file mode 100644 index f5bc708c8f7e8e7ec69aaad1cc723bcc1dbf12b5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherTOT/8ac4547d-2b57-4227-a63d-05da4f3ccbc7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_AetherTOT/1762652579.5251331", - "retrieved_timestamp": "1762652579.5251389", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/AetherTOT", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/AetherTOT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4397642699149368 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5066056342472064 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1487915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4078541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38040226063829785 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MllamaForConditionalGeneration", - "params_billions": 10.67 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherTOT/fa9282c6-7820-49dd-9893-9559c5a984a9.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherTOT/fa9282c6-7820-49dd-9893-9559c5a984a9.json deleted file mode 100644 index 1d63e8c1c32df0f3066a2879f87dd5e608ca6d61..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherTOT/fa9282c6-7820-49dd-9893-9559c5a984a9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_AetherTOT/1762652579.5253801", - "retrieved_timestamp": "1762652579.525381", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/AetherTOT", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/AetherTOT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43829040279790954 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5034307630533988 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14425981873111782 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40518750000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37782579787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MllamaForConditionalGeneration", - "params_billions": 10.67 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherUncensored/574d79eb-94ae-4b79-8763-77267d300670.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherUncensored/574d79eb-94ae-4b79-8763-77267d300670.json deleted file mode 100644 index 39f20c89e4731958b63949eba8b391f6833a6456..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherUncensored/574d79eb-94ae-4b79-8763-77267d300670.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_AetherUncensored/1762652579.525634", - "retrieved_timestamp": "1762652579.5256362", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/AetherUncensored", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/AetherUncensored" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40419309653940433 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44631282805144945 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14501510574018128 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3746770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27102726063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Cogito-MIS/822268e0-8f66-4bb3-9d01-52c684ca281f.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Cogito-MIS/822268e0-8f66-4bb3-9d01-52c684ca281f.json deleted file mode 100644 index a56880a092b5aea3e976890c100a96814c526f33..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Cogito-MIS/822268e0-8f66-4bb3-9d01-52c684ca281f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_Cogito-MIS/1762652579.525943", - "retrieved_timestamp": "1762652579.5259452", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/Cogito-MIS", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/Cogito-MIS" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18145188100905596 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5059981143086196 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08610271903323263 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37676041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14353390957446807 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_CogitoDistil/f39e1ca4-2a0f-4650-886b-4160760daee5.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_CogitoDistil/f39e1ca4-2a0f-4650-886b-4160760daee5.json deleted file mode 100644 index 9cf24eb5641db9ed036dc4ab0798e950bb3fbc0e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_CogitoDistil/f39e1ca4-2a0f-4650-886b-4160760daee5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_CogitoDistil/1762652579.526295", - "retrieved_timestamp": "1762652579.5262961", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/CogitoDistil", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/CogitoDistil" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27764775240805506 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36767660461416857 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39274924471299094 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3754895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625498670212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_CogitoZ/5e08074c-32bd-4ce6-a09f-7b5832cba288.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_CogitoZ/5e08074c-32bd-4ce6-a09f-7b5832cba288.json deleted file mode 100644 index 08c0282b1c942ac8758e5abb1c40bfc9e7a06445..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_CogitoZ/5e08074c-32bd-4ce6-a09f-7b5832cba288.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_CogitoZ/1762652579.5265448", - "retrieved_timestamp": "1762652579.526546", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/CogitoZ", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/CogitoZ" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3967240255854466 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6734487392645502 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5241691842900302 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4792604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5592586436170213 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_CogitoZ14/024f23d8-66b0-4a7b-be01-fd68f0ab295e.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_CogitoZ14/024f23d8-66b0-4a7b-be01-fd68f0ab295e.json deleted file mode 100644 index c7e763d4700efce4bd84ba7a33c16db11cd37854..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_CogitoZ14/024f23d8-66b0-4a7b-be01-fd68f0ab295e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_CogitoZ14/1762652579.526777", - "retrieved_timestamp": "1762652579.5267782", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/CogitoZ14", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/CogitoZ14" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6637034180419066 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6297514788808327 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42220543806646527 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.405875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39993351063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_DocumentCogito/6d2a742b-adde-4b6d-90d4-ebefbb2b61be.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_DocumentCogito/6d2a742b-adde-4b6d-90d4-ebefbb2b61be.json deleted file mode 100644 index 44b579dd52cc4e8a6a0ee8cc69e120c814d553c2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_DocumentCogito/6d2a742b-adde-4b6d-90d4-ebefbb2b61be.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_DocumentCogito/1762652579.5270069", - "retrieved_timestamp": "1762652579.527008", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/DocumentCogito", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/DocumentCogito" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5064340394597445 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5111563719111275 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16314199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3973125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38023603723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MllamaForConditionalGeneration", - "params_billions": 10.67 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_DocumentCogito/9a638bb6-f16f-496b-a974-d97dbb6cd626.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_DocumentCogito/9a638bb6-f16f-496b-a974-d97dbb6cd626.json deleted file mode 100644 index 8aeeac07f4283abb0ba9c5b411473ff6f15ed7e0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_DocumentCogito/9a638bb6-f16f-496b-a974-d97dbb6cd626.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_DocumentCogito/1762652579.527227", - "retrieved_timestamp": "1762652579.5272279", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/DocumentCogito", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/DocumentCogito" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7770349339751859 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5186726621665779 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21978851963746224 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39105208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3737533244680851 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MllamaForConditionalGeneration", - "params_billions": 10.67 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_MawaredT1/1e87d1ea-59df-4c1a-96da-31e12e27dab2.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_MawaredT1/1e87d1ea-59df-4c1a-96da-31e12e27dab2.json deleted file mode 100644 index da1bc48b891dbc347a9360ab36f455299a2e8800..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_MawaredT1/1e87d1ea-59df-4c1a-96da-31e12e27dab2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_MawaredT1/1762652579.527918", - "retrieved_timestamp": "1762652579.527919", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/MawaredT1", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/MawaredT1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41988036188424493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5214815439293661 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3021148036253776 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47020833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4718251329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Mini_QwQ/7d5c59eb-c6fb-414a-9e4e-44d1d56f7401.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Mini_QwQ/7d5c59eb-c6fb-414a-9e4e-44d1d56f7401.json deleted file mode 100644 index 8aebd643552d63888a42a70978e497b702cecf36..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Mini_QwQ/7d5c59eb-c6fb-414a-9e4e-44d1d56f7401.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_Mini_QwQ/1762652579.528199", - "retrieved_timestamp": "1762652579.5282", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/Mini_QwQ", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/Mini_QwQ" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44970566984490046 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.554898906584336 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41918429003021146 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46825 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.437250664893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_NemoR/a2da90e0-5f59-4c89-b819-316d2cc318be.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_NemoR/a2da90e0-5f59-4c89-b819-316d2cc318be.json deleted file mode 100644 index c6d74e255b6ba94778859b164a725213eeeaf87f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_NemoR/a2da90e0-5f59-4c89-b819-316d2cc318be.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_NemoR/1762652579.528459", - "retrieved_timestamp": "1762652579.528459", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/NemoR", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/NemoR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2287375275380435 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5194067688446361 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39080208333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32903922872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 6.124 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PathFinderAI2.0/274ab6b9-5fd7-41df-9076-b16c52947640.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PathFinderAI2.0/274ab6b9-5fd7-41df-9076-b16c52947640.json deleted file mode 100644 index 7669e28dc235ca3b807becfae22194e216c01b46..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PathFinderAI2.0/274ab6b9-5fd7-41df-9076-b16c52947640.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_PathFinderAI2.0/1762652579.528686", - "retrieved_timestamp": "1762652579.528686", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/PathFinderAI2.0", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/PathFinderAI2.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45410178326839457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.665823006477417 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5075528700906344 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4215625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5546875 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PathFinderAi3.0/ba3924c6-f913-4094-a56a-1699f07f103c.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PathFinderAi3.0/ba3924c6-f913-4094-a56a-1699f07f103c.json deleted file mode 100644 index a6c8bdb24ac1f388b3dcb809763d6f511f02f5b9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PathFinderAi3.0/ba3924c6-f913-4094-a56a-1699f07f103c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_PathFinderAi3.0/1762652579.5289202", - "retrieved_timestamp": "1762652579.5289202", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/PathFinderAi3.0", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/PathFinderAi3.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42709898624538445 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6884221416328996 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5045317220543807 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4085570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4806875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5757147606382979 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PathfinderAI/445f2c79-2c47-465c-ace7-73b3fa491454.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PathfinderAI/445f2c79-2c47-465c-ace7-73b3fa491454.json deleted file mode 100644 index 9badb0279a37da0abd6f279a2886bb475b1850e8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PathfinderAI/445f2c79-2c47-465c-ace7-73b3fa491454.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_PathfinderAI/1762652579.529176", - "retrieved_timestamp": "1762652579.5291772", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/PathfinderAI", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/PathfinderAI" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37451739163198094 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6667854331232542 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47583081570996977 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39429530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48583333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.559341755319149 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PathfinderAI/c07f2943-f3f4-46be-993e-be56dadcb561.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PathfinderAI/c07f2943-f3f4-46be-993e-be56dadcb561.json deleted file mode 100644 index ac333a09b04eebbb8ac83590878a522b9325679c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PathfinderAI/c07f2943-f3f4-46be-993e-be56dadcb561.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_PathfinderAI/1762652579.5294342", - "retrieved_timestamp": "1762652579.529435", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/PathfinderAI", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/PathfinderAI" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4855006937148987 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6627335380624046 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42559375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.554188829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PixelParse_AI/29459932-a7a5-458f-9778-e236cc4ea985.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PixelParse_AI/29459932-a7a5-458f-9778-e236cc4ea985.json deleted file mode 100644 index aa668632a96e425e81313f49787c4662a928c701..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PixelParse_AI/29459932-a7a5-458f-9778-e236cc4ea985.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_PixelParse_AI/1762652579.529871", - "retrieved_timestamp": "1762652579.529872", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/PixelParse_AI", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/PixelParse_AI" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43829040279790954 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5034307630533988 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1472809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40518750000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37782579787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MllamaForConditionalGeneration", - "params_billions": 10.67 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_RA2.0/3baf9882-5625-47eb-a88b-b172dfc9a330.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_RA2.0/3baf9882-5625-47eb-a88b-b172dfc9a330.json deleted file mode 100644 index 51186af7249f1e821a4b000f8f773e47be35c058..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_RA2.0/3baf9882-5625-47eb-a88b-b172dfc9a330.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_RA2.0/1762652579.53008", - "retrieved_timestamp": "1762652579.530081", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/RA2.0", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/RA2.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37838934028378035 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4888687006782508 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38368580060422963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40912499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26163563829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_RA_Reasoner/ab74d5ca-6c80-44de-96e9-af61861090b6.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_RA_Reasoner/ab74d5ca-6c80-44de-96e9-af61861090b6.json deleted file mode 100644 index b1f4c3ee011c7239750543b2b8c11f26c94a4304..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_RA_Reasoner/ab74d5ca-6c80-44de-96e9-af61861090b6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_RA_Reasoner/1762652579.530283", - "retrieved_timestamp": "1762652579.530284", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/RA_Reasoner", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/RA_Reasoner" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.559215104810791 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6053692417205033 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2122356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3963541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43001994680851063 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_RA_Reasoner2.0/5cf9872a-6d67-4b42-bfe4-abad05bdd9cf.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_RA_Reasoner2.0/5cf9872a-6d67-4b42-bfe4-abad05bdd9cf.json deleted file mode 100644 index e5a5769990591bf5eac130471bb3aded5ea3c89a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_RA_Reasoner2.0/5cf9872a-6d67-4b42-bfe4-abad05bdd9cf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_RA_Reasoner2.0/1762652579.530484", - "retrieved_timestamp": "1762652579.530485", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/RA_Reasoner2.0", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/RA_Reasoner2.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5366339091388627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6062469551969276 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3883541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4353390957446808 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_ReasonTest/39d481bf-ea86-42a7-a6f1-ce38ce9dce30.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_ReasonTest/39d481bf-ea86-42a7-a6f1-ce38ce9dce30.json deleted file mode 100644 index 2eae2e44d4fec5070c7fd4fe19c2eb0054ed1016..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_ReasonTest/39d481bf-ea86-42a7-a6f1-ce38ce9dce30.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_ReasonTest/1762652579.530685", - "retrieved_timestamp": "1762652579.530686", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/ReasonTest", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/ReasonTest" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4079653098223824 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.543526397621609 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21374622356495468 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43154166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4271941489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.808 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Research_PathfinderAI/900e5686-083d-460c-918f-06a39936810c.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Research_PathfinderAI/900e5686-083d-460c-918f-06a39936810c.json deleted file mode 100644 index 57dbc59cbdd0d4e1448f354195d071a3734c4a5b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Research_PathfinderAI/900e5686-083d-460c-918f-06a39936810c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_Research_PathfinderAI/1762652579.530894", - "retrieved_timestamp": "1762652579.530895", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/Research_PathfinderAI", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/Research_PathfinderAI" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3456916537010687 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287225755504323 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16993957703927492 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2407718120805369 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33939583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11303191489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Zirel-7B-Math/460de6c8-d706-420b-9c0a-a108ddb11e5f.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Zirel-7B-Math/460de6c8-d706-420b-9c0a-a108ddb11e5f.json deleted file mode 100644 index 6abcd7894505d59eecadbe11dbcdee1e3c04fa3a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Zirel-7B-Math/460de6c8-d706-420b-9c0a-a108ddb11e5f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_Zirel-7B-Math/1762652579.531958", - "retrieved_timestamp": "1762652579.531959", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/Zirel-7B-Math", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/Zirel-7B-Math" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6638785090227264 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5447698777469486 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19788519637462235 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47891666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4237034574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Zirel_1.5/661e2393-2560-4d25-a6f3-f0d680052e8e.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Zirel_1.5/661e2393-2560-4d25-a6f3-f0d680052e8e.json deleted file mode 100644 index 44e7be9b8700430174c750f97faedbad79929f10..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Zirel_1.5/661e2393-2560-4d25-a6f3-f0d680052e8e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_Zirel_1.5/1762652579.532257", - "retrieved_timestamp": "1762652579.532258", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/Zirel_1.5", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/Zirel_1.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4167575366693706 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3984669254999634 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11329305135951662 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36581250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21434507978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_mini-Cogito-R1/faac8ed1-1042-42dc-9762-3f90161fb34f.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_mini-Cogito-R1/faac8ed1-1042-42dc-9762-3f90161fb34f.json deleted file mode 100644 index c050631b8b4e8fd0e517de6e6ce45d6cab02afaa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_mini-Cogito-R1/faac8ed1-1042-42dc-9762-3f90161fb34f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_mini-Cogito-R1/1762652579.532486", - "retrieved_timestamp": "1762652579.532487", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/mini-Cogito-R1", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/mini-Cogito-R1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2298368329366082 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3280491875175077 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27492447129909364 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34469791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14818816489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_mini_Pathfinder/a9afd0b3-8189-47e0-9e33-d60540679e20.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_mini_Pathfinder/a9afd0b3-8189-47e0-9e33-d60540679e20.json deleted file mode 100644 index 6dbd700d9e57495ec735787cbcf88c0398d5432f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_mini_Pathfinder/a9afd0b3-8189-47e0-9e33-d60540679e20.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_mini_Pathfinder/1762652579.53272", - "retrieved_timestamp": "1762652579.5327208", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/mini_Pathfinder", - "developer": "Daemontatox", - "inference_platform": "unknown", - "id": "Daemontatox/mini_Pathfinder" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29615752869054107 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39556911910803755 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47507552870090636 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37809374999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28091755319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Dampfinchen/Dampfinchen_Llama-3.1-8B-Ultra-Instruct/2a0d23aa-47ae-4974-ac64-5371097a1b0f.json b/leaderboard_data/HFOpenLLMv2/Dampfinchen/Dampfinchen_Llama-3.1-8B-Ultra-Instruct/2a0d23aa-47ae-4974-ac64-5371097a1b0f.json deleted file mode 100644 index 4fdc019de8e0304a878d768bf9f1e5220f0f5a45..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Dampfinchen/Dampfinchen_Llama-3.1-8B-Ultra-Instruct/2a0d23aa-47ae-4974-ac64-5371097a1b0f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Dampfinchen_Llama-3.1-8B-Ultra-Instruct/1762652579.532935", - "retrieved_timestamp": "1762652579.532935", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Dampfinchen/Llama-3.1-8B-Ultra-Instruct", - "developer": "Dampfinchen", - "inference_platform": "unknown", - "id": "Dampfinchen/Llama-3.1-8B-Ultra-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8081091503876381 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5257532452246574 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40032291666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.382563164893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Danielbrdz/Danielbrdz_Barcenas-10b/acdaefdc-b28c-4081-bf72-517d6c70595e.json b/leaderboard_data/HFOpenLLMv2/Danielbrdz/Danielbrdz_Barcenas-10b/acdaefdc-b28c-4081-bf72-517d6c70595e.json deleted file mode 100644 index 07da5b577a7f97ba31620c44e21599c33d93fd00..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Danielbrdz/Danielbrdz_Barcenas-10b/acdaefdc-b28c-4081-bf72-517d6c70595e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-10b/1762652579.533203", - "retrieved_timestamp": "1762652579.533203", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Danielbrdz/Barcenas-10b", - "developer": "Danielbrdz", - "inference_platform": "unknown", - "id": "Danielbrdz/Barcenas-10b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6607811717354397 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6120828494270083 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21525679758308158 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41346875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4360871010638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Danielbrdz/Danielbrdz_Barcenas-3b-GRPO/88a3b40a-3ba2-4f13-bd8c-110872d807c7.json b/leaderboard_data/HFOpenLLMv2/Danielbrdz/Danielbrdz_Barcenas-3b-GRPO/88a3b40a-3ba2-4f13-bd8c-110872d807c7.json deleted file mode 100644 index da2f614a900a19abf1fde269a17668205a6a3f98..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Danielbrdz/Danielbrdz_Barcenas-3b-GRPO/88a3b40a-3ba2-4f13-bd8c-110872d807c7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-3b-GRPO/1762652579.534181", - "retrieved_timestamp": "1762652579.5341818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Danielbrdz/Barcenas-3b-GRPO", - "developer": "Danielbrdz", - "inference_platform": "unknown", - "id": "Danielbrdz/Barcenas-3b-GRPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5444276741268723 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44143515175110304 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13746223564954682 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35759375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036901595744681 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_12b-mn-dans-reasoning-test-2/bd21f54f-6b0c-4db9-bb46-7a4c60f960ae.json b/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_12b-mn-dans-reasoning-test-2/bd21f54f-6b0c-4db9-bb46-7a4c60f960ae.json deleted file mode 100644 index 6074f9aa8ac0ab6f596ac9194ff17d257334fd46..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_12b-mn-dans-reasoning-test-2/bd21f54f-6b0c-4db9-bb46-7a4c60f960ae.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_12b-mn-dans-reasoning-test-2/1762652579.534956", - "retrieved_timestamp": "1762652579.5349572", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Dans-DiscountModels/12b-mn-dans-reasoning-test-2", - "developer": "Dans-DiscountModels", - "inference_platform": "unknown", - "id": "Dans-DiscountModels/12b-mn-dans-reasoning-test-2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3710953603106424 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48070333147041405 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37021875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2507480053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_12b-mn-dans-reasoning-test-3/c9dedad4-65d4-479e-b465-912cd8885e32.json b/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_12b-mn-dans-reasoning-test-3/c9dedad4-65d4-479e-b465-912cd8885e32.json deleted file mode 100644 index 4348b94528907fad2837cb756cced3e3c9e7249b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_12b-mn-dans-reasoning-test-3/c9dedad4-65d4-479e-b465-912cd8885e32.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_12b-mn-dans-reasoning-test-3/1762652579.535208", - "retrieved_timestamp": "1762652579.535209", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Dans-DiscountModels/12b-mn-dans-reasoning-test-3", - "developer": "Dans-DiscountModels", - "inference_platform": "unknown", - "id": "Dans-DiscountModels/12b-mn-dans-reasoning-test-3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5052593784491815 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48388753289945696 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4167604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2515791223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-CoreCurriculum-12b-ChatML/6b61018c-249d-482b-a787-06f1e6514f29.json b/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-CoreCurriculum-12b-ChatML/6b61018c-249d-482b-a787-06f1e6514f29.json deleted file mode 100644 index 1582ce7e8e062365d44a8e566bfc078de41d5eb0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-CoreCurriculum-12b-ChatML/6b61018c-249d-482b-a787-06f1e6514f29.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Dans-Instruct-CoreCurriculum-12b-ChatML/1762652579.535429", - "retrieved_timestamp": "1762652579.53543", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Dans-DiscountModels/Dans-Instruct-CoreCurriculum-12b-ChatML", - "developer": "Dans-DiscountModels", - "inference_platform": "unknown", - "id": "Dans-DiscountModels/Dans-Instruct-CoreCurriculum-12b-ChatML" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21110209798889168 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4791864789096407 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3606354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2805019946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.1.0/9873b58d-1ffd-44a7-bb93-15038986419a.json b/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.1.0/9873b58d-1ffd-44a7-bb93-15038986419a.json deleted file mode 100644 index f43831e00ae44449bb56486de942545d76fa00aa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.1.0/9873b58d-1ffd-44a7-bb93-15038986419a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.1.0/1762652579.5358772", - "retrieved_timestamp": "1762652579.535878", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.0", - "developer": "Dans-DiscountModels", - "inference_platform": "unknown", - "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06682048076880455 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47747656219777285 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3785833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.328374335106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.1.1/71656625-cd85-49a6-a8df-abc0b9c0ae5d.json b/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.1.1/71656625-cd85-49a6-a8df-abc0b9c0ae5d.json deleted file mode 100644 index 89fc7ebcd4231480ba4bfc8877b3a64f93bd8487..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.1.1/71656625-cd85-49a6-a8df-abc0b9c0ae5d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.1.1/1762652579.5360918", - "retrieved_timestamp": "1762652579.5360918", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.1", - "developer": "Dans-DiscountModels", - "inference_platform": "unknown", - "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09105063453857985 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4748653313732898 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05966767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3824895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.327875664893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.2.0/d47dc284-0ed6-4853-8a54-b87b4b529150.json b/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.2.0/d47dc284-0ed6-4853-8a54-b87b4b529150.json deleted file mode 100644 index 046914817ca86d1e41fbbc203e5fbf827a79f08c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.2.0/d47dc284-0ed6-4853-8a54-b87b4b529150.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.2.0/1762652579.536302", - "retrieved_timestamp": "1762652579.536303", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.2.0", - "developer": "Dans-DiscountModels", - "inference_platform": "unknown", - "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.2.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5064085515321569 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4624263551503409 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3644479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2999501329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML/60db255b-d34c-4f33-91a4-279a9ccc6791.json b/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML/60db255b-d34c-4f33-91a4-279a9ccc6791.json deleted file mode 100644 index e3694327583b19fbc5e0f71b3b98e73c0a1662d9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML/60db255b-d34c-4f33-91a4-279a9ccc6791.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML/1762652579.5356538", - "retrieved_timestamp": "1762652579.535655", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML", - "developer": "Dans-DiscountModels", - "inference_platform": "unknown", - "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08250774611364513 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4738171816307924 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3918229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32878989361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Darkknight535/Darkknight535_OpenCrystal-12B-L3/8edb0a0d-994b-4b97-b9a7-7f46ba0e7365.json b/leaderboard_data/HFOpenLLMv2/Darkknight535/Darkknight535_OpenCrystal-12B-L3/8edb0a0d-994b-4b97-b9a7-7f46ba0e7365.json deleted file mode 100644 index 973da37f6a722e4af835e781b0b30f8ca2e6cb63..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Darkknight535/Darkknight535_OpenCrystal-12B-L3/8edb0a0d-994b-4b97-b9a7-7f46ba0e7365.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Darkknight535_OpenCrystal-12B-L3/1762652579.5369642", - "retrieved_timestamp": "1762652579.5369651", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Darkknight535/OpenCrystal-12B-L3", - "developer": "Darkknight535", - "inference_platform": "unknown", - "id": "Darkknight535/OpenCrystal-12B-L3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070909630890482 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5222598504945516 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36565625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640292553191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 11.52 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-DARKEST-PLANET-16.5B/2c317db5-86fa-41fd-8f1e-3cf08ba91cde.json b/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-DARKEST-PLANET-16.5B/2c317db5-86fa-41fd-8f1e-3cf08ba91cde.json deleted file mode 100644 index b3f9b01688524f477e3dbc6d3ca34b18a0b6a415..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-DARKEST-PLANET-16.5B/2c317db5-86fa-41fd-8f1e-3cf08ba91cde.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavidAU_L3-DARKEST-PLANET-16.5B/1762652579.540939", - "retrieved_timestamp": "1762652579.54094", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavidAU/L3-DARKEST-PLANET-16.5B", - "developer": "DavidAU", - "inference_platform": "unknown", - "id": "DavidAU/L3-DARKEST-PLANET-16.5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6230623634179533 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5230436906708896 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3753645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.363031914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 16.537 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Dark-Planet-8B/f5c2a2cc-392e-4337-aad9-72d65ba87aab.json b/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Dark-Planet-8B/f5c2a2cc-392e-4337-aad9-72d65ba87aab.json deleted file mode 100644 index 774dc40102833cfff9d5f3368300edbcd08287c6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Dark-Planet-8B/f5c2a2cc-392e-4337-aad9-72d65ba87aab.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavidAU_L3-Dark-Planet-8B/1762652579.5412621", - "retrieved_timestamp": "1762652579.541263", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavidAU/L3-Dark-Planet-8B", - "developer": "DavidAU", - "inference_platform": "unknown", - "id": "DavidAU/L3-Dark-Planet-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4134108609600305 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5084081453197787 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0823262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36159375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37367021276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Jamet-12.2B-MK.V-Blackroot-Instruct/85a1ef3f-7d68-4324-876d-b52cfa71317d.json b/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Jamet-12.2B-MK.V-Blackroot-Instruct/85a1ef3f-7d68-4324-876d-b52cfa71317d.json deleted file mode 100644 index 73c09edee4d6b44f40073ca3cd5ba96e8aefa637..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Jamet-12.2B-MK.V-Blackroot-Instruct/85a1ef3f-7d68-4324-876d-b52cfa71317d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavidAU_L3-Jamet-12.2B-MK.V-Blackroot-Instruct/1762652579.541475", - "retrieved_timestamp": "1762652579.541475", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavidAU/L3-Jamet-12.2B-MK.V-Blackroot-Instruct", - "developer": "DavidAU", - "inference_platform": "unknown", - "id": "DavidAU/L3-Jamet-12.2B-MK.V-Blackroot-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3961998608137519 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4765717717789398 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40196875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3291223404255319 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 12.174 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Lumimaid-12.2B-v0.1-OAS-Instruct/a8fe768d-f988-4fba-be80-2f5cc22dfd9d.json b/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Lumimaid-12.2B-v0.1-OAS-Instruct/a8fe768d-f988-4fba-be80-2f5cc22dfd9d.json deleted file mode 100644 index d1deb236f4eb03e544afecc0b8b78eb4b56bbc18..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Lumimaid-12.2B-v0.1-OAS-Instruct/a8fe768d-f988-4fba-be80-2f5cc22dfd9d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavidAU_L3-Lumimaid-12.2B-v0.1-OAS-Instruct/1762652579.541698", - "retrieved_timestamp": "1762652579.5416992", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavidAU/L3-Lumimaid-12.2B-v0.1-OAS-Instruct", - "developer": "DavidAU", - "inference_platform": "unknown", - "id": "DavidAU/L3-Lumimaid-12.2B-v0.1-OAS-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3924032677739509 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46930207579694677 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41942708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31416223404255317 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 12.174 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-SMB-Instruct-12.2B-F32/970cfd49-b72c-4cf5-af05-1ecfc57c94d8.json b/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-SMB-Instruct-12.2B-F32/970cfd49-b72c-4cf5-af05-1ecfc57c94d8.json deleted file mode 100644 index 3f9b310d14bb302f5557a3bab7f3523e491422ec..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-SMB-Instruct-12.2B-F32/970cfd49-b72c-4cf5-af05-1ecfc57c94d8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavidAU_L3-SMB-Instruct-12.2B-F32/1762652579.541919", - "retrieved_timestamp": "1762652579.54192", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavidAU/L3-SMB-Instruct-12.2B-F32", - "developer": "DavidAU", - "inference_platform": "unknown", - "id": "DavidAU/L3-SMB-Instruct-12.2B-F32" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4303215468290802 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4786412360346213 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40872916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3312001329787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 12.174 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Stheno-Maid-Blackroot-Grand-HORROR-16B/9dbf220a-cbe9-40da-814f-951205c3abbe.json b/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Stheno-Maid-Blackroot-Grand-HORROR-16B/9dbf220a-cbe9-40da-814f-951205c3abbe.json deleted file mode 100644 index bc0f245cc6b872e493213f17007d87d86f19a305..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Stheno-Maid-Blackroot-Grand-HORROR-16B/9dbf220a-cbe9-40da-814f-951205c3abbe.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavidAU_L3-Stheno-Maid-Blackroot-Grand-HORROR-16B/1762652579.542142", - "retrieved_timestamp": "1762652579.5421429", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B", - "developer": "DavidAU", - "inference_platform": "unknown", - "id": "DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34389309254998957 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4736328900737677 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40311458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3570478723404255 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 16.537 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Stheno-v3.2-12.2B-Instruct/51566db6-56e4-40bd-a248-6c968f2b83e8.json b/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Stheno-v3.2-12.2B-Instruct/51566db6-56e4-40bd-a248-6c968f2b83e8.json deleted file mode 100644 index 51769c270fad4b7ac981fc751ed1018861b5bfab..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Stheno-v3.2-12.2B-Instruct/51566db6-56e4-40bd-a248-6c968f2b83e8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavidAU_L3-Stheno-v3.2-12.2B-Instruct/1762652579.542359", - "retrieved_timestamp": "1762652579.54236", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavidAU/L3-Stheno-v3.2-12.2B-Instruct", - "developer": "DavidAU", - "inference_platform": "unknown", - "id": "DavidAU/L3-Stheno-v3.2-12.2B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4027945850343755 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4845980190500647 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41025 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3345246010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 12.174 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3.1-Dark-Planet-SpinFire-Uncensored-8B/0982d599-57c7-4eeb-bd47-844879bb79a5.json b/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3.1-Dark-Planet-SpinFire-Uncensored-8B/0982d599-57c7-4eeb-bd47-844879bb79a5.json deleted file mode 100644 index 62dccdf4b20a61949cfd222efaf336e719b2eef1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3.1-Dark-Planet-SpinFire-Uncensored-8B/0982d599-57c7-4eeb-bd47-844879bb79a5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavidAU_L3.1-Dark-Planet-SpinFire-Uncensored-8B/1762652579.542578", - "retrieved_timestamp": "1762652579.542578", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavidAU/L3.1-Dark-Planet-SpinFire-Uncensored-8B", - "developer": "DavidAU", - "inference_platform": "unknown", - "id": "DavidAU/L3.1-Dark-Planet-SpinFire-Uncensored-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7042702252246262 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5260910165037093 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09290030211480363 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.354125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3670212765957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B/a7df9a84-fa29-4c8e-8413-4542b5eafb63.json b/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B/a7df9a84-fa29-4c8e-8413-4542b5eafb63.json deleted file mode 100644 index 8c60a12c1eef707aa88f089030ef447aa0f4eeb2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B/a7df9a84-fa29-4c8e-8413-4542b5eafb63.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavidAU_L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B/1762652579.542795", - "retrieved_timestamp": "1762652579.5427961", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavidAU/L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B", - "developer": "DavidAU", - "inference_platform": "unknown", - "id": "DavidAU/L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3345257250761313 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4420822344441435 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26057401812688824 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37486458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2892287234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 13.668 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Davidsv/Davidsv_SUONG-1/097e6cbe-88cd-4d61-bb4c-0b8ddb537abe.json b/leaderboard_data/HFOpenLLMv2/Davidsv/Davidsv_SUONG-1/097e6cbe-88cd-4d61-bb4c-0b8ddb537abe.json deleted file mode 100644 index d9d0aaaa6aeeac787216b50add0eef9fc7d772c1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Davidsv/Davidsv_SUONG-1/097e6cbe-88cd-4d61-bb4c-0b8ddb537abe.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Davidsv_SUONG-1/1762652579.5439382", - "retrieved_timestamp": "1762652579.54394", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Davidsv/SUONG-1", - "developer": "Davidsv", - "inference_platform": "unknown", - "id": "Davidsv/SUONG-1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2497207409673001 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28171339082318814 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24412751677852348 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35775 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1085438829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 2.879 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DavieLion/DavieLion_Lllma-3.2-1B/274ed35b-4abe-4f20-bd18-7e386a7fdaa5.json b/leaderboard_data/HFOpenLLMv2/DavieLion/DavieLion_Lllma-3.2-1B/274ed35b-4abe-4f20-bd18-7e386a7fdaa5.json deleted file mode 100644 index f49a90f712c907d792bf5007851dd9cd0dee4c1c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DavieLion/DavieLion_Lllma-3.2-1B/274ed35b-4abe-4f20-bd18-7e386a7fdaa5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavieLion_Lllma-3.2-1B/1762652579.5458188", - "retrieved_timestamp": "1762652579.54582", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavieLion/Lllma-3.2-1B", - "developer": "DavieLion", - "inference_platform": "unknown", - "id": "DavieLion/Lllma-3.2-1B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1601439735457475 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2964692268500723 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24412751677852348 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35781250000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11261635638297872 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DebateLabKIT/DebateLabKIT_Llama-3.1-Argunaut-1-8B-SFT/ea40f65f-60a8-4efa-aa8d-e2a64ef5999f.json b/leaderboard_data/HFOpenLLMv2/DebateLabKIT/DebateLabKIT_Llama-3.1-Argunaut-1-8B-SFT/ea40f65f-60a8-4efa-aa8d-e2a64ef5999f.json deleted file mode 100644 index b2b91ce5cbe886602e468a0947edf2e865a92e40..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DebateLabKIT/DebateLabKIT_Llama-3.1-Argunaut-1-8B-SFT/ea40f65f-60a8-4efa-aa8d-e2a64ef5999f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DebateLabKIT_Llama-3.1-Argunaut-1-8B-SFT/1762652579.546083", - "retrieved_timestamp": "1762652579.5460842", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DebateLabKIT/Llama-3.1-Argunaut-1-8B-SFT", - "developer": "DebateLabKIT", - "inference_platform": "unknown", - "id": "DebateLabKIT/Llama-3.1-Argunaut-1-8B-SFT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.551921124837653 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48238301936695316 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14501510574018128 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4503020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3472406914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Deci/Deci_DeciLM-7B-instruct/1b3a2041-d14f-44d1-9efd-dbeceaa67ee6.json b/leaderboard_data/HFOpenLLMv2/Deci/Deci_DeciLM-7B-instruct/1b3a2041-d14f-44d1-9efd-dbeceaa67ee6.json deleted file mode 100644 index e10f9312727e123fb8de6df98d10cf9c9ec5388f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Deci/Deci_DeciLM-7B-instruct/1b3a2041-d14f-44d1-9efd-dbeceaa67ee6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Deci_DeciLM-7B-instruct/1762652579.546672", - "retrieved_timestamp": "1762652579.546672", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Deci/DeciLM-7B-instruct", - "developer": "Deci", - "inference_platform": "unknown", - "id": "Deci/DeciLM-7B-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4880239985460799 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4589748654047652 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38841666666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26080452127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "DeciLMForCausalLM", - "params_billions": 7.044 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Deci/Deci_DeciLM-7B/f9d2408b-03dd-4cf8-851e-51a15ff13be9.json b/leaderboard_data/HFOpenLLMv2/Deci/Deci_DeciLM-7B/f9d2408b-03dd-4cf8-851e-51a15ff13be9.json deleted file mode 100644 index 72de39763a1e08c2dd34b355bcd21d0585336e5d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Deci/Deci_DeciLM-7B/f9d2408b-03dd-4cf8-851e-51a15ff13be9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Deci_DeciLM-7B/1762652579.5463831", - "retrieved_timestamp": "1762652579.5463839", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Deci/DeciLM-7B", - "developer": "Deci", - "inference_platform": "unknown", - "id": "Deci/DeciLM-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28129474239462404 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44228566674266495 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43585416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26919880319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "DeciLMForCausalLM", - "params_billions": 7.044 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DeepAutoAI/DeepAutoAI_d2nwg_Llama-3.1-8B-Instruct-v0.0/d5d73b84-4436-47bf-967e-c9be94898189.json b/leaderboard_data/HFOpenLLMv2/DeepAutoAI/DeepAutoAI_d2nwg_Llama-3.1-8B-Instruct-v0.0/d5d73b84-4436-47bf-967e-c9be94898189.json deleted file mode 100644 index f6f1d86a600c4a3c3acafb3b343b4cf888bcc431..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DeepAutoAI/DeepAutoAI_d2nwg_Llama-3.1-8B-Instruct-v0.0/d5d73b84-4436-47bf-967e-c9be94898189.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepAutoAI_d2nwg_Llama-3.1-8B-Instruct-v0.0/1762652579.548984", - "retrieved_timestamp": "1762652579.548985", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepAutoAI/d2nwg_Llama-3.1-8B-Instruct-v0.0", - "developer": "DeepAutoAI", - "inference_platform": "unknown", - "id": "DeepAutoAI/d2nwg_Llama-3.1-8B-Instruct-v0.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7892746800711002 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5080411642065981 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18051359516616314 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41346875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3877160904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DeepAutoAI/DeepAutoAI_ldm_soup_Llama-3.1-8B-Instruct-v0.0/fb8eb882-26a9-4008-9226-90d44d38b54f.json b/leaderboard_data/HFOpenLLMv2/DeepAutoAI/DeepAutoAI_ldm_soup_Llama-3.1-8B-Instruct-v0.0/fb8eb882-26a9-4008-9226-90d44d38b54f.json deleted file mode 100644 index ee036c49c9d75ce1e94bd8e5daa09c36caccf8be..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DeepAutoAI/DeepAutoAI_ldm_soup_Llama-3.1-8B-Instruct-v0.0/fb8eb882-26a9-4008-9226-90d44d38b54f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepAutoAI_ldm_soup_Llama-3.1-8B-Instruct-v0.0/1762652579.5500422", - "retrieved_timestamp": "1762652579.5500429", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.0", - "developer": "DeepAutoAI", - "inference_platform": "unknown", - "id": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7889499860370484 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5125175335277464 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19184290030211482 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41213541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38954454787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DeepAutoAI/DeepAutoAI_ldm_soup_Llama-3.1-8B-Instruct-v0.1/a7ba1534-464f-45ba-834f-5f501b155c20.json b/leaderboard_data/HFOpenLLMv2/DeepAutoAI/DeepAutoAI_ldm_soup_Llama-3.1-8B-Instruct-v0.1/a7ba1534-464f-45ba-834f-5f501b155c20.json deleted file mode 100644 index 3f234812361c46cdbf717d9e98f693008a4a8555..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DeepAutoAI/DeepAutoAI_ldm_soup_Llama-3.1-8B-Instruct-v0.1/a7ba1534-464f-45ba-834f-5f501b155c20.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepAutoAI_ldm_soup_Llama-3.1-8B-Instruct-v0.1/1762652579.550273", - "retrieved_timestamp": "1762652579.5502741", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.1", - "developer": "DeepAutoAI", - "inference_platform": "unknown", - "id": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7889499860370484 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5125175335277464 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19184290030211482 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41213541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38954454787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_Lexora-Lite-3B/5eb28bbd-8428-4385-b078-13e8a868e9f0.json b/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_Lexora-Lite-3B/5eb28bbd-8428-4385-b078-13e8a868e9f0.json deleted file mode 100644 index 3fe0eddfe8e698793350f7d097c3a0aebde79a44..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_Lexora-Lite-3B/5eb28bbd-8428-4385-b078-13e8a868e9f0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepMount00_Lexora-Lite-3B/1762652579.550504", - "retrieved_timestamp": "1762652579.550505", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepMount00/Lexora-Lite-3B", - "developer": "DeepMount00", - "inference_platform": "unknown", - "id": "DeepMount00/Lexora-Lite-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5775996577968678 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4873392373334518 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23036253776435045 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39660416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3602061170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_Lexora-Lite-3B_v2/bf38278f-6375-41a6-9744-04fb4a32ed72.json b/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_Lexora-Lite-3B_v2/bf38278f-6375-41a6-9744-04fb4a32ed72.json deleted file mode 100644 index aa9a7d93c9286d227da45d3674b474e44a206ab9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_Lexora-Lite-3B_v2/bf38278f-6375-41a6-9744-04fb4a32ed72.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepMount00_Lexora-Lite-3B_v2/1762652579.550789", - "retrieved_timestamp": "1762652579.550789", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepMount00/Lexora-Lite-3B_v2", - "developer": "DeepMount00", - "inference_platform": "unknown", - "id": "DeepMount00/Lexora-Lite-3B_v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49431840848947456 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48117654754683153 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2280966767371601 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38215625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35438829787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_Lexora-Medium-7B/cc8f594a-e2f7-49e3-8654-57f1b397797f.json b/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_Lexora-Medium-7B/cc8f594a-e2f7-49e3-8654-57f1b397797f.json deleted file mode 100644 index 290a6767699d2bf141a2f1fbc6a38a470e15af5d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_Lexora-Medium-7B/cc8f594a-e2f7-49e3-8654-57f1b397797f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepMount00_Lexora-Medium-7B/1762652579.551008", - "retrieved_timestamp": "1762652579.551009", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepMount00/Lexora-Medium-7B", - "developer": "DeepMount00", - "inference_platform": "unknown", - "id": "DeepMount00/Lexora-Medium-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4103379034295669 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5144844494250328 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22205438066465258 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44394791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43251329787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_Qwen2.5-7B-Instruct-MathCoder/ea1a36fb-66c0-4b1a-bdac-7ec2602a7c65.json b/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_Qwen2.5-7B-Instruct-MathCoder/ea1a36fb-66c0-4b1a-bdac-7ec2602a7c65.json deleted file mode 100644 index c97063c1bec61605b530e86b10955d7ed580d98b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_Qwen2.5-7B-Instruct-MathCoder/ea1a36fb-66c0-4b1a-bdac-7ec2602a7c65.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2.5-7B-Instruct-MathCoder/1762652579.55323", - "retrieved_timestamp": "1762652579.553231", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepMount00/Qwen2.5-7B-Instruct-MathCoder", - "developer": "DeepMount00", - "inference_platform": "unknown", - "id": "DeepMount00/Qwen2.5-7B-Instruct-MathCoder" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15302508455342934 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2998444769655102 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3806354166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178523936170212 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_mergekit-ties-okvgjfz/34350829-d42d-4e67-b23f-171044428c1f.json b/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_mergekit-ties-okvgjfz/34350829-d42d-4e67-b23f-171044428c1f.json deleted file mode 100644 index 94e14b85d87033ded181f096b4cb4d5f9dd8dfde..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_mergekit-ties-okvgjfz/34350829-d42d-4e67-b23f-171044428c1f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepMount00_mergekit-ties-okvgjfz/1762652579.5535848", - "retrieved_timestamp": "1762652579.553586", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepMount00/mergekit-ties-okvgjfz", - "developer": "DeepMount00", - "inference_platform": "unknown", - "id": "DeepMount00/mergekit-ties-okvgjfz" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15302508455342934 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2998444769655102 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3806354166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178523936170212 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Baldur-8B/6267c5c6-abd3-4eb0-94ca-5c569414e7a9.json b/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Baldur-8B/6267c5c6-abd3-4eb0-94ca-5c569414e7a9.json deleted file mode 100644 index fd3a3c053ddb61bac42892d36dcb302cf424ac2a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Baldur-8B/6267c5c6-abd3-4eb0-94ca-5c569414e7a9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Delta-Vector_Baldur-8B/1762652579.5538838", - "retrieved_timestamp": "1762652579.553885", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Delta-Vector/Baldur-8B", - "developer": "Delta-Vector", - "inference_platform": "unknown", - "id": "Delta-Vector/Baldur-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47818233398493776 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5305842954529679 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43715624999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3654421542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Control-8B-V1.1/20796a87-8691-44b9-9b60-85ad3c7f4b7b.json b/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Control-8B-V1.1/20796a87-8691-44b9-9b60-85ad3c7f4b7b.json deleted file mode 100644 index a312468cb7d9076264dc84a7dcc0d0876e67ebf8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Control-8B-V1.1/20796a87-8691-44b9-9b60-85ad3c7f4b7b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Delta-Vector_Control-8B-V1.1/1762652579.5543838", - "retrieved_timestamp": "1762652579.554385", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Delta-Vector/Control-8B-V1.1", - "developer": "Delta-Vector", - "inference_platform": "unknown", - "id": "Delta-Vector/Control-8B-V1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5696562897556262 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49928406748541837 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42372916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37450132978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Control-8B/26dc4843-56a7-45b5-a61a-386e260574a2.json b/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Control-8B/26dc4843-56a7-45b5-a61a-386e260574a2.json deleted file mode 100644 index 007a2da14d07b351b27e03b573c235d5a4c4df78..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Control-8B/26dc4843-56a7-45b5-a61a-386e260574a2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Delta-Vector_Control-8B/1762652579.554166", - "retrieved_timestamp": "1762652579.554166", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Delta-Vector/Control-8B", - "developer": "Delta-Vector", - "inference_platform": "unknown", - "id": "Delta-Vector/Control-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5489733906035985 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5041458754993735 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13897280966767372 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43554166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3731715425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Darkens-8B/a1689935-8ccb-49a8-8c2a-8dbf32b7ac02.json b/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Darkens-8B/a1689935-8ccb-49a8-8c2a-8dbf32b7ac02.json deleted file mode 100644 index 49c77e5d39d5d676bf0df8a11453db078859104d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Darkens-8B/a1689935-8ccb-49a8-8c2a-8dbf32b7ac02.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Delta-Vector_Darkens-8B/1762652579.5545971", - "retrieved_timestamp": "1762652579.5545971", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Delta-Vector/Darkens-8B", - "developer": "Delta-Vector", - "inference_platform": "unknown", - "id": "Delta-Vector/Darkens-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25476624245889795 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5250590567372793 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4105520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3735871010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.414 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Henbane-7b-attempt2/73f9a017-15ac-42e6-9600-69b411de4086.json b/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Henbane-7b-attempt2/73f9a017-15ac-42e6-9600-69b411de4086.json deleted file mode 100644 index a137c4b605875b3ce3b8562c979c54b9af373e70..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Henbane-7b-attempt2/73f9a017-15ac-42e6-9600-69b411de4086.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Delta-Vector_Henbane-7b-attempt2/1762652579.55481", - "retrieved_timestamp": "1762652579.55481", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Delta-Vector/Henbane-7b-attempt2", - "developer": "Delta-Vector", - "inference_platform": "unknown", - "id": "Delta-Vector/Henbane-7b-attempt2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4157335868828043 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5061177974093075 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22734138972809667 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39734375000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4027593085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Odin-9B/586d4e20-c1f4-466a-8488-07ac18ad6253.json b/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Odin-9B/586d4e20-c1f4-466a-8488-07ac18ad6253.json deleted file mode 100644 index 9d3f2912a311f456826ad4d1baf7c0c02c19b943..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Odin-9B/586d4e20-c1f4-466a-8488-07ac18ad6253.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Delta-Vector_Odin-9B/1762652579.555037", - "retrieved_timestamp": "1762652579.555038", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Delta-Vector/Odin-9B", - "developer": "Delta-Vector", - "inference_platform": "unknown", - "id": "Delta-Vector/Odin-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3691970637907419 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5440253444823155 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14501510574018128 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46478125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4046708776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Tor-8B/ce7e8e58-e323-4704-b6f3-7fa6c5c3b7f2.json b/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Tor-8B/ce7e8e58-e323-4704-b6f3-7fa6c5c3b7f2.json deleted file mode 100644 index 007c691daa47e3cec19dc634ca60e544995d6201..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Tor-8B/ce7e8e58-e323-4704-b6f3-7fa6c5c3b7f2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Delta-Vector_Tor-8B/1762652579.555239", - "retrieved_timestamp": "1762652579.55524", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Delta-Vector/Tor-8B", - "developer": "Delta-Vector", - "inference_platform": "unknown", - "id": "Delta-Vector/Tor-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23815476269631244 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5209108776928992 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40921874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37300531914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.414 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_L3-8B-R1-WolfCore-V1.5-test/3c4058cd-238b-4b01-870d-8693f5ce1b8f.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_L3-8B-R1-WolfCore-V1.5-test/3c4058cd-238b-4b01-870d-8693f5ce1b8f.json deleted file mode 100644 index 0a947bb7691874fe235bcf9d8a2afe35e827532b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_L3-8B-R1-WolfCore-V1.5-test/3c4058cd-238b-4b01-870d-8693f5ce1b8f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_L3-8B-R1-WolfCore-V1.5-test/1762652579.556192", - "retrieved_timestamp": "1762652579.556193", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/L3-8B-R1-WolfCore-V1.5-test", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/L3-8B-R1-WolfCore-V1.5-test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3955006050612375 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5314954163679548 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3840729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37275598404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_L3-8B-R1-WolfCore/6d8d63c0-ad69-4224-8250-b1664f6abbcf.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_L3-8B-R1-WolfCore/6d8d63c0-ad69-4224-8250-b1664f6abbcf.json deleted file mode 100644 index 6815336b17d5cdbe289b9facf0f1df7624782c10..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_L3-8B-R1-WolfCore/6d8d63c0-ad69-4224-8250-b1664f6abbcf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_L3-8B-R1-WolfCore/1762652579.555949", - "retrieved_timestamp": "1762652579.5559502", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/L3-8B-R1-WolfCore", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/L3-8B-R1-WolfCore" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3775404814780339 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.531794652653343 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16314199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42766666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716755319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_L3-8B-WolfCore/c6771d5c-acaf-4b17-96b4-abf3b75bc68f.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_L3-8B-WolfCore/c6771d5c-acaf-4b17-96b4-abf3b75bc68f.json deleted file mode 100644 index 30ac21a40fece211d1af6c3b206e226251d67ad4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_L3-8B-WolfCore/c6771d5c-acaf-4b17-96b4-abf3b75bc68f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_L3-8B-WolfCore/1762652579.556399", - "retrieved_timestamp": "1762652579.5564", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/L3-8B-WolfCore", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/L3-8B-WolfCore" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4021950646506824 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5181980783946081 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09818731117824774 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39728125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3705119680851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-FoxFrame-test/ef5bb4eb-0875-4cc5-8e27-b59ffbd2e477.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-FoxFrame-test/ef5bb4eb-0875-4cc5-8e27-b59ffbd2e477.json deleted file mode 100644 index cee948dd589b557fd64ebc1e6a3ebbaa1cc5bfd1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-FoxFrame-test/ef5bb4eb-0875-4cc5-8e27-b59ffbd2e477.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-FoxFrame-test/1762652579.556618", - "retrieved_timestamp": "1762652579.556619", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-FoxFrame-test", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-FoxFrame-test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42220308780701876 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5456376527271466 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13972809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3503158244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-FoxFrame2-test/e46698de-8b2d-4b3c-b482-8cc8a3665eac.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-FoxFrame2-test/e46698de-8b2d-4b3c-b482-8cc8a3665eac.json deleted file mode 100644 index 0d5b8cc2986d70699324927f798b6c55d2db7309..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-FoxFrame2-test/e46698de-8b2d-4b3c-b482-8cc8a3665eac.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-FoxFrame2-test/1762652579.556837", - "retrieved_timestamp": "1762652579.5568378", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-FoxFrame2-test", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-FoxFrame2-test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43189514931492884 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5484795753806021 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1404833836858006 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4251875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3568816489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-FoxFrame3-test/35351894-ea9d-456b-ab9a-c98686948e6b.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-FoxFrame3-test/35351894-ea9d-456b-ab9a-c98686948e6b.json deleted file mode 100644 index a4af875561b618722454ef9fb63dbabc16021a38..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-FoxFrame3-test/35351894-ea9d-456b-ab9a-c98686948e6b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-FoxFrame3-test/1762652579.557049", - "retrieved_timestamp": "1762652579.5570502", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-FoxFrame3-test", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-FoxFrame3-test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43231957871780213 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5394764281718397 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13217522658610273 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45976041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35289228723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Kakigori/2f19082b-8377-4f63-8c5f-1aa25071a240.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Kakigori/2f19082b-8377-4f63-8c5f-1aa25071a240.json deleted file mode 100644 index 049afa2cc622669abfba973f9ef81358a191f9b7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Kakigori/2f19082b-8377-4f63-8c5f-1aa25071a240.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Kakigori/1762652579.5572648", - "retrieved_timestamp": "1762652579.557266", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Kakigori", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Kakigori" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.359329911302012 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5415529337961275 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40521875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3581283244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame-Experiment-2/630c100f-c88d-42a7-9614-bd9a958eab2b.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame-Experiment-2/630c100f-c88d-42a7-9614-bd9a958eab2b.json deleted file mode 100644 index b8b4afe65d8ceb288e8dc45eee8582e7a886b2e1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame-Experiment-2/630c100f-c88d-42a7-9614-bd9a958eab2b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-LilithFrame-Experiment-2/1762652579.5578592", - "retrieved_timestamp": "1762652579.5578601", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-LilithFrame-Experiment-2", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-LilithFrame-Experiment-2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4299469851106176 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4982672766561394 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10725075528700906 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3804479166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32762632978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame-Experiment-3/37292ca7-9e82-4c80-bc6e-bc7e1be7a95e.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame-Experiment-3/37292ca7-9e82-4c80-bc6e-bc7e1be7a95e.json deleted file mode 100644 index bea82e52b33f91536072628d4b7044d3f7ab64c6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame-Experiment-3/37292ca7-9e82-4c80-bc6e-bc7e1be7a95e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-LilithFrame-Experiment-3/1762652579.558079", - "retrieved_timestamp": "1762652579.558079", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-LilithFrame-Experiment-3", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-LilithFrame-Experiment-3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4127858526487498 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5468080647121653 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4038541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3603723404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame-Experiment-4/ecc18f9c-c495-4ae6-8fd8-b2f84fb453ac.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame-Experiment-4/ecc18f9c-c495-4ae6-8fd8-b2f84fb453ac.json deleted file mode 100644 index 4249b365d0d2543cce74f07f1c74cff306774184..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame-Experiment-4/ecc18f9c-c495-4ae6-8fd8-b2f84fb453ac.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-LilithFrame-Experiment-4/1762652579.5582879", - "retrieved_timestamp": "1762652579.5582888", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-LilithFrame-Experiment-4", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-LilithFrame-Experiment-4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3981480250180632 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5534370722864824 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12235649546827794 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43706249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3648603723404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame/3d68e2fb-06cc-43b9-830b-f1cd02f12166.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame/3d68e2fb-06cc-43b9-830b-f1cd02f12166.json deleted file mode 100644 index 88de0d3743e59c9de9ac00411b8e139578fa6bcf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame/3d68e2fb-06cc-43b9-830b-f1cd02f12166.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-LilithFrame/1762652579.557674", - "retrieved_timestamp": "1762652579.5576751", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-LilithFrame", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-LilithFrame" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43604192431636946 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4956125598349656 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3842604166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32372007978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame/a04a8775-8b4d-4608-9692-47af9f7ed5a7.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame/a04a8775-8b4d-4608-9692-47af9f7ed5a7.json deleted file mode 100644 index 3d42198fe516d706ac40694d629f216274e4c5dc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame/a04a8775-8b4d-4608-9692-47af9f7ed5a7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-LilithFrame/1762652579.557468", - "retrieved_timestamp": "1762652579.557469", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-LilithFrame", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-LilithFrame" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4509545782966972 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4944264226434414 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3895625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3256316489361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-GreenSnake/9b9eb072-4120-4a6a-a565-27136e617f10.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-GreenSnake/9b9eb072-4120-4a6a-a565-27136e617f10.json deleted file mode 100644 index f2718c16e9ad6aebf9aae47635f4521873c329c2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-GreenSnake/9b9eb072-4120-4a6a-a565-27136e617f10.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-GreenSnake/1762652579.5585039", - "retrieved_timestamp": "1762652579.558505", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Mimicore-GreenSnake", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Mimicore-GreenSnake" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47800724300411795 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5480509710089697 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13897280966767372 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4305833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3651097074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Nocturne/6a21892f-1d11-4c59-8894-8800822b2e72.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Nocturne/6a21892f-1d11-4c59-8894-8800822b2e72.json deleted file mode 100644 index 2dfeac638713f216d4a35ba6b749e63dfea922f7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Nocturne/6a21892f-1d11-4c59-8894-8800822b2e72.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-Nocturne/1762652579.558723", - "retrieved_timestamp": "1762652579.5587242", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Mimicore-Nocturne", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Mimicore-Nocturne" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3956502081144696 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5703329773483826 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10574018126888217 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45690625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36336436170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Orochi-v2-Experiment/db8eedcc-1dcf-47af-9c2b-a72da97146ca.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Orochi-v2-Experiment/db8eedcc-1dcf-47af-9c2b-a72da97146ca.json deleted file mode 100644 index 5d3debf648e24a82df4ca8d7a5a7916045d19029..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Orochi-v2-Experiment/db8eedcc-1dcf-47af-9c2b-a72da97146ca.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-Orochi-v2-Experiment/1762652579.5591779", - "retrieved_timestamp": "1762652579.559179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Mimicore-Orochi-v2-Experiment", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Mimicore-Orochi-v2-Experiment" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2842413684579139 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5322525988273211 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45737500000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3423371010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Orochi-v3-Experiment/8198ab16-4a8b-4da9-8e8a-d1e3beb02839.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Orochi-v3-Experiment/8198ab16-4a8b-4da9-8e8a-d1e3beb02839.json deleted file mode 100644 index cd4f535d75b92a3a6d9c00dde50e7e46a250c3ee..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Orochi-v3-Experiment/8198ab16-4a8b-4da9-8e8a-d1e3beb02839.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-Orochi-v3-Experiment/1762652579.559391", - "retrieved_timestamp": "1762652579.559392", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Mimicore-Orochi-v3-Experiment", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Mimicore-Orochi-v3-Experiment" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4101628124487471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5437817873983797 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44379166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.339594414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Orochi-v4-Experiment/e4e71999-6f83-4745-8a9d-66e711e39ac3.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Orochi-v4-Experiment/e4e71999-6f83-4745-8a9d-66e711e39ac3.json deleted file mode 100644 index 3b8dc74a58b611c19404f3a5c279032bd0a2a098..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Orochi-v4-Experiment/e4e71999-6f83-4745-8a9d-66e711e39ac3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-Orochi-v4-Experiment/1762652579.559606", - "retrieved_timestamp": "1762652579.559606", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Mimicore-Orochi-v4-Experiment", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Mimicore-Orochi-v4-Experiment" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4320702402957486 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5462502212045214 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4449375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3519780585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Orochi/f1bfef73-3586-4f9d-80ca-71b0fb00aadd.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Orochi/f1bfef73-3586-4f9d-80ca-71b0fb00aadd.json deleted file mode 100644 index 2aa943f1db42b1b15ad15293eb6439be51010d8f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Orochi/f1bfef73-3586-4f9d-80ca-71b0fb00aadd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-Orochi/1762652579.558937", - "retrieved_timestamp": "1762652579.558938", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Mimicore-Orochi", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Mimicore-Orochi" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4620451513096362 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.54977394640115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13595166163141995 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45458333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34466422872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-1/aa2478d9-59bd-458b-abee-5669aa6280df.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-1/aa2478d9-59bd-458b-abee-5669aa6280df.json deleted file mode 100644 index 874d0e54c3fee5e2a7c7d62c9d313e2d03a9b989..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-1/aa2478d9-59bd-458b-abee-5669aa6280df.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-1/1762652579.5600362", - "retrieved_timestamp": "1762652579.5600362", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-1", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39090391272933595 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48656395204478037 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3789583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31141954787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-2/66bd7a21-6f85-49b5-bc01-3f52ed8d1c64.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-2/66bd7a21-6f85-49b5-bc01-3f52ed8d1c64.json deleted file mode 100644 index 80b632ad0bca748f2de4ca209bdb40e9a5e60bcf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-2/66bd7a21-6f85-49b5-bc01-3f52ed8d1c64.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-2/1762652579.560246", - "retrieved_timestamp": "1762652579.560246", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-2", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31239333856389934 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5126398500939828 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39746875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33136635638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-3/1a3eefa6-7b3d-4541-93b0-8fe86f6bf038.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-3/1a3eefa6-7b3d-4541-93b0-8fe86f6bf038.json deleted file mode 100644 index bc70dc1d4b40f3cec93db77fe56b0313f1209695..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-3/1a3eefa6-7b3d-4541-93b0-8fe86f6bf038.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-3/1762652579.56046", - "retrieved_timestamp": "1762652579.560461", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-3", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4302218114602588 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4811798810475259 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3684166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31981382978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-4/d7303703-f33e-430b-813d-998c95dbdb67.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-4/d7303703-f33e-430b-813d-998c95dbdb67.json deleted file mode 100644 index 9c4e261190f62843b87ab0aa12243a05886065a8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-4/d7303703-f33e-430b-813d-998c95dbdb67.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-4/1762652579.560668", - "retrieved_timestamp": "1762652579.560668", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-4", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42405151664250856 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5184748714407336 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40019791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3341921542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake/8aa34df4-8347-4f2d-98a0-7ec58bd62e43.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake/8aa34df4-8347-4f2d-98a0-7ec58bd62e43.json deleted file mode 100644 index 4fc07e3479343a0cff8766fb57d4ecc76dc52899..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake/8aa34df4-8347-4f2d-98a0-7ec58bd62e43.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-WhiteSnake/1762652579.55982", - "retrieved_timestamp": "1762652579.5598211", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44376033369238066 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5604605871844869 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.456875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3657746010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Unleashed-Twilight/00f0fe96-4a06-46e7-88d8-368b86bcdb06.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Unleashed-Twilight/00f0fe96-4a06-46e7-88d8-368b86bcdb06.json deleted file mode 100644 index fb25db5473451ed62411602ba9faf50092b5ddbf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Unleashed-Twilight/00f0fe96-4a06-46e7-88d8-368b86bcdb06.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Unleashed-Twilight/1762652579.560919", - "retrieved_timestamp": "1762652579.56092", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-Unleashed-Twilight", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-Unleashed-Twilight" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3505121965274361 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5520627163174447 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09592145015105741 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4383958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3677692819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-WolFrame/3bb96e7a-6c09-4b9e-8f2b-0b525c2ebeb3.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-WolFrame/3bb96e7a-6c09-4b9e-8f2b-0b525c2ebeb3.json deleted file mode 100644 index 76cd34d944e3458ed4a1ca14b2f065c9e080720b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-WolFrame/3bb96e7a-6c09-4b9e-8f2b-0b525c2ebeb3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-WolFrame/1762652579.5611808", - "retrieved_timestamp": "1762652579.561182", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MN-12B-WolFrame", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MN-12B-WolFrame" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4397387819873491 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.511681287565329 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40146875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33934507978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B-test/e6031abf-1ae2-431c-8247-3124fff41d17.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B-test/e6031abf-1ae2-431c-8247-3124fff41d17.json deleted file mode 100644 index c562fc9b2701b9550518b8b0cdb04b6a653abddc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B-test/e6031abf-1ae2-431c-8247-3124fff41d17.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MiniusLight-24B-test/1762652579.5616372", - "retrieved_timestamp": "1762652579.5616379", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MiniusLight-24B-test", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MiniusLight-24B-test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03936776641533354 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6333927323374534 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0256797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36828859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40925000000000006 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5182014627659575 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B-v1b-test/2917ef74-c8cb-4255-8bda-76280fbe7c64.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B-v1b-test/2917ef74-c8cb-4255-8bda-76280fbe7c64.json deleted file mode 100644 index 7c4010756499bd81818875a5dcec5147abdab087..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B-v1b-test/2917ef74-c8cb-4255-8bda-76280fbe7c64.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MiniusLight-24B-v1b-test/1762652579.561931", - "retrieved_timestamp": "1762652579.561932", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MiniusLight-24B-v1b-test", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MiniusLight-24B-v1b-test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37911408396388246 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6617145681113757 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2394259818731118 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4557291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5364860372340425 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B-v1c-test/23a21492-0897-44b4-a046-cf93fa8c2a64.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B-v1c-test/23a21492-0897-44b4-a046-cf93fa8c2a64.json deleted file mode 100644 index 2fc6893347505f4eef150ad043c2d4b7440a7b2b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B-v1c-test/23a21492-0897-44b4-a046-cf93fa8c2a64.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MiniusLight-24B-v1c-test/1762652579.562173", - "retrieved_timestamp": "1762652579.5621738", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MiniusLight-24B-v1c-test", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MiniusLight-24B-v1c-test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37858881102142317 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6752681657268389 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46341666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5487034574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B-v1d-test/af67712e-7436-4703-ac22-9878dd8e190a.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B-v1d-test/af67712e-7436-4703-ac22-9878dd8e190a.json deleted file mode 100644 index 9e2cb14290f8c59da4c302c35c1815be6e612bbc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B-v1d-test/af67712e-7436-4703-ac22-9878dd8e190a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MiniusLight-24B-v1d-test/1762652579.5624058", - "retrieved_timestamp": "1762652579.5624058", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MiniusLight-24B-v1d-test", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MiniusLight-24B-v1d-test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40324339419407174 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6712025325276962 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46208333333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5488696808510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B/2ec36e2e-0fba-4c6a-b9d0-fe57e7d708ef.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B/2ec36e2e-0fba-4c6a-b9d0-fe57e7d708ef.json deleted file mode 100644 index 3780b3adc0feaeb4e6046946826c1262b211154e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B/2ec36e2e-0fba-4c6a-b9d0-fe57e7d708ef.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DoppelReflEx_MiniusLight-24B/1762652579.561418", - "retrieved_timestamp": "1762652579.561419", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DoppelReflEx/MiniusLight-24B", - "developer": "DoppelReflEx", - "inference_platform": "unknown", - "id": "DoppelReflEx/MiniusLight-24B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25766410900854175 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6256461050033514 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12613293051359517 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35822147651006714 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43191666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5091422872340425 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Again-8B-Model_Stock/cd2de45f-874a-4d63-bb6d-0afe5e687964.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Again-8B-Model_Stock/cd2de45f-874a-4d63-bb6d-0afe5e687964.json deleted file mode 100644 index 945bbd224af8b5c35947455381a3f6ce89b0b579..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Again-8B-Model_Stock/cd2de45f-874a-4d63-bb6d-0afe5e687964.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Again-8B-Model_Stock/1762652579.562616", - "retrieved_timestamp": "1762652579.562617", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Again-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Again-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6724213974476612 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5309801059970912 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39867708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.351811835106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Alita99-8B-LINEAR/570c991f-06bc-45d1-8409-d779a07df9a6.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Alita99-8B-LINEAR/570c991f-06bc-45d1-8409-d779a07df9a6.json deleted file mode 100644 index 9f47b0d5235076eeb434885fdeb6ead1f8bcf976..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Alita99-8B-LINEAR/570c991f-06bc-45d1-8409-d779a07df9a6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Alita99-8B-LINEAR/1762652579.562879", - "retrieved_timestamp": "1762652579.56288", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Alita99-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Alita99-8B-LINEAR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7190077882241341 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5441767095577089 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42664583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38090093085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_AnotherTest/81ec7c1a-8874-44c3-b482-8a8ecfb2ae72.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_AnotherTest/81ec7c1a-8874-44c3-b482-8a8ecfb2ae72.json deleted file mode 100644 index 0a105c7685c453ea021c2825a593c44b8db149c8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_AnotherTest/81ec7c1a-8874-44c3-b482-8a8ecfb2ae72.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_AnotherTest/1762652579.563089", - "retrieved_timestamp": "1762652579.563089", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/AnotherTest", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/AnotherTest" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47006387496287627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46834113564549334 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42128125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2874833776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire-8B-model_stock/28bd44a9-d916-4a0b-b0ae-c6a4cb5d727d.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire-8B-model_stock/28bd44a9-d916-4a0b-b0ae-c6a4cb5d727d.json deleted file mode 100644 index fce22b64236780a53835c758e78beb9c97316242..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire-8B-model_stock/28bd44a9-d916-4a0b-b0ae-c6a4cb5d727d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire-8B-model_stock/1762652579.5633001", - "retrieved_timestamp": "1762652579.563301", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Aspire-8B-model_stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Aspire-8B-model_stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7140620221013578 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5278251846388996 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14954682779456194 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42124999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37632978723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_1.3-8B_model-stock/917a9361-af08-4e12-a93a-01321629b31f.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_1.3-8B_model-stock/917a9361-af08-4e12-a93a-01321629b31f.json deleted file mode 100644 index fb1ba83656cab470ba9deecb728a192dba4710dc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_1.3-8B_model-stock/917a9361-af08-4e12-a93a-01321629b31f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_1.3-8B_model-stock/1762652579.563606", - "retrieved_timestamp": "1762652579.563607", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Aspire_1.3-8B_model-stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Aspire_1.3-8B_model-stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7061685217445268 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5301644606574212 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1691842900302115 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4104583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37159242021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V2-8B-Model_Stock/677221cd-f218-4982-8363-d969913d7a22.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V2-8B-Model_Stock/677221cd-f218-4982-8363-d969913d7a22.json deleted file mode 100644 index d0a25b481c292f4e780742776357a7e9e6829400..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V2-8B-Model_Stock/677221cd-f218-4982-8363-d969913d7a22.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V2-8B-Model_Stock/1762652579.56384", - "retrieved_timestamp": "1762652579.563841", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Aspire_V2-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Aspire_V2-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7371430027881576 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5329650089428358 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38937499999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3696808510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V2.1-8B-Model_Stock/292e77cb-e6e6-4d10-9956-1e09369e9669.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V2.1-8B-Model_Stock/292e77cb-e6e6-4d10-9956-1e09369e9669.json deleted file mode 100644 index d34c6c4775aca9fecf4bae4713e63147298c9601..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V2.1-8B-Model_Stock/292e77cb-e6e6-4d10-9956-1e09369e9669.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V2.1-8B-Model_Stock/1762652579.564126", - "retrieved_timestamp": "1762652579.564127", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Aspire_V2.1-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Aspire_V2.1-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7237540836092679 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5236395810818485 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17673716012084592 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41359375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800698138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V2_ALT-8B-Model_Stock/62414bde-98c1-4cae-af6d-18d3b0ecd50a.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V2_ALT-8B-Model_Stock/62414bde-98c1-4cae-af6d-18d3b0ecd50a.json deleted file mode 100644 index 994d8bc3d1005c7c1499498109e0a346e9f4dbc4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V2_ALT-8B-Model_Stock/62414bde-98c1-4cae-af6d-18d3b0ecd50a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V2_ALT-8B-Model_Stock/1762652579.5643399", - "retrieved_timestamp": "1762652579.564341", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Aspire_V2_ALT-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Aspire_V2_ALT-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7381170848903134 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5265819478728287 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1729607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39749999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3726728723404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V2_ALT_ROW-8B-Model_Stock/3258c5c6-d12d-4e09-8404-22b6aaf82e87.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V2_ALT_ROW-8B-Model_Stock/3258c5c6-d12d-4e09-8404-22b6aaf82e87.json deleted file mode 100644 index c85eb1266fa02ac44db15e54c89e4511bfdb0fc6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V2_ALT_ROW-8B-Model_Stock/3258c5c6-d12d-4e09-8404-22b6aaf82e87.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V2_ALT_ROW-8B-Model_Stock/1762652579.564561", - "retrieved_timestamp": "1762652579.5645618", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Aspire_V2_ALT_ROW-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Aspire_V2_ALT_ROW-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7381170848903134 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5265819478728287 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1729607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39749999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3726728723404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V3-8B-Model_Stock/3cc8c02f-87a8-428a-8991-a0d52500d927.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V3-8B-Model_Stock/3cc8c02f-87a8-428a-8991-a0d52500d927.json deleted file mode 100644 index d37945534ef9bf1126d4a389a15562304a96eaf0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V3-8B-Model_Stock/3cc8c02f-87a8-428a-8991-a0d52500d927.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V3-8B-Model_Stock/1762652579.5648441", - "retrieved_timestamp": "1762652579.564845", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Aspire_V3-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Aspire_V3-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5118795905973927 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5267958758971987 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40149999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36419547872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V4-8B-Model_Stock/692e0ff5-0607-4aae-8996-45bbbc4d2288.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V4-8B-Model_Stock/692e0ff5-0607-4aae-8996-45bbbc4d2288.json deleted file mode 100644 index 7252a1a00643d12772e03cd31bd442c5039c546f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V4-8B-Model_Stock/692e0ff5-0607-4aae-8996-45bbbc4d2288.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V4-8B-Model_Stock/1762652579.565063", - "retrieved_timestamp": "1762652579.565064", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Aspire_V4-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Aspire_V4-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.769416259967996 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5314037161536506 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3867395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.370844414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V4_ALT-8B-Model_Stock/7b634b21-8d89-4656-89d7-3590fc8a883a.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V4_ALT-8B-Model_Stock/7b634b21-8d89-4656-89d7-3590fc8a883a.json deleted file mode 100644 index 206d588e5e5cc71ed60ceb66d41045fcac3aedf8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V4_ALT-8B-Model_Stock/7b634b21-8d89-4656-89d7-3590fc8a883a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V4_ALT-8B-Model_Stock/1762652579.565274", - "retrieved_timestamp": "1762652579.565275", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Aspire_V4_ALT-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Aspire_V4_ALT-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7365933500888753 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5268232518944024 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18126888217522658 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3920416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3681848404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Asymmetric_Linearity-8B-Model_Stock/ad58e69a-0917-4375-9e83-5db2ad50d0ca.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Asymmetric_Linearity-8B-Model_Stock/ad58e69a-0917-4375-9e83-5db2ad50d0ca.json deleted file mode 100644 index e50673e7015969f2855448c6d8a928f725265b89..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Asymmetric_Linearity-8B-Model_Stock/ad58e69a-0917-4375-9e83-5db2ad50d0ca.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Asymmetric_Linearity-8B-Model_Stock/1762652579.5654871", - "retrieved_timestamp": "1762652579.565488", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Asymmetric_Linearity-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Asymmetric_Linearity-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7174341857382855 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.546535755155883 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41994791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3843916223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aurora_faustus-8B-LINEAR/c8b72a17-837a-45ed-b285-bf472a4f6d45.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aurora_faustus-8B-LINEAR/c8b72a17-837a-45ed-b285-bf472a4f6d45.json deleted file mode 100644 index cc9f005454fab20cba11b030029be8ae1512d6a6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aurora_faustus-8B-LINEAR/c8b72a17-837a-45ed-b285-bf472a4f6d45.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Aurora_faustus-8B-LINEAR/1762652579.565701", - "retrieved_timestamp": "1762652579.565702", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Aurora_faustus-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Aurora_faustus-8B-LINEAR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7281003293483512 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5515538279425277 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4145833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3842253989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aurora_faustus-8B-LORABLATED/05707286-d03b-4cb2-9a0f-48245c867cc7.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aurora_faustus-8B-LORABLATED/05707286-d03b-4cb2-9a0f-48245c867cc7.json deleted file mode 100644 index 55aea1d9b182b1659e5f016333a34e564804b96b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aurora_faustus-8B-LORABLATED/05707286-d03b-4cb2-9a0f-48245c867cc7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Aurora_faustus-8B-LORABLATED/1762652579.565921", - "retrieved_timestamp": "1762652579.565921", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Aurora_faustus-8B-LORABLATED", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Aurora_faustus-8B-LORABLATED" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7527050448365891 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.539159616655651 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1487915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42385416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36727061170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aurora_faustus-8B-LORABLATED_ALT/2b644863-f52f-487a-85d1-3fc3ce973d90.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aurora_faustus-8B-LORABLATED_ALT/2b644863-f52f-487a-85d1-3fc3ce973d90.json deleted file mode 100644 index 0285f5489b64e254d9e56b0db6e208ee6577541d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aurora_faustus-8B-LORABLATED_ALT/2b644863-f52f-487a-85d1-3fc3ce973d90.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Aurora_faustus-8B-LORABLATED_ALT/1762652579.566129", - "retrieved_timestamp": "1762652579.56613", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Aurora_faustus-8B-LORABLATED_ALT", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Aurora_faustus-8B-LORABLATED_ALT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7377923908562614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5387670721191214 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15861027190332327 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4225208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36943151595744683 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Autumn_Dawn-8B-LINEAR/4f1d1b68-311f-4409-bf5b-41629a889da3.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Autumn_Dawn-8B-LINEAR/4f1d1b68-311f-4409-bf5b-41629a889da3.json deleted file mode 100644 index a6e629a23db71c42e1c97c80a3431344e25282dc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Autumn_Dawn-8B-LINEAR/4f1d1b68-311f-4409-bf5b-41629a889da3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Autumn_Dawn-8B-LINEAR/1762652579.566346", - "retrieved_timestamp": "1762652579.5663471", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Autumn_Dawn-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Autumn_Dawn-8B-LINEAR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7292993701157373 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5459436958014627 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4185520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39677526595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel-8B-LINEAR/f3af4295-9508-4a3e-ba5a-6336a560fd6c.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel-8B-LINEAR/f3af4295-9508-4a3e-ba5a-6336a560fd6c.json deleted file mode 100644 index dfad81f64921d207f4619ce75cbec9a7d1eea62c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel-8B-LINEAR/f3af4295-9508-4a3e-ba5a-6336a560fd6c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_BaeZel-8B-LINEAR/1762652579.56655", - "retrieved_timestamp": "1762652579.566551", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/BaeZel-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/BaeZel-8B-LINEAR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7377923908562614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5463800554321383 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18126888217522658 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4227083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3861369680851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel-8B-Model_Stock/31395ff6-82da-4585-85d6-459fcac9408f.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel-8B-Model_Stock/31395ff6-82da-4585-85d6-459fcac9408f.json deleted file mode 100644 index a8f953dd262e9950c4503b059afc7df8e1d502b9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel-8B-Model_Stock/31395ff6-82da-4585-85d6-459fcac9408f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_BaeZel-8B-Model_Stock/1762652579.566763", - "retrieved_timestamp": "1762652579.566764", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/BaeZel-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/BaeZel-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7713145564878965 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5407680550216925 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16389728096676737 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41991666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38804853723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel_V2-8B-Model_Stock/cdacd0e9-fa22-4053-b16d-d3bac8541829.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel_V2-8B-Model_Stock/cdacd0e9-fa22-4053-b16d-d3bac8541829.json deleted file mode 100644 index 244bf1daf99dfc1834d11222299297776f92277b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel_V2-8B-Model_Stock/cdacd0e9-fa22-4053-b16d-d3bac8541829.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_BaeZel_V2-8B-Model_Stock/1762652579.566977", - "retrieved_timestamp": "1762652579.566978", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/BaeZel_V2-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/BaeZel_V2-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7676675665013276 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5373871612758611 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4185833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3946974734042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel_V2_ALT-8B-Model_Stock/08ac7c80-0f13-43c9-a538-683eb6927b59.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel_V2_ALT-8B-Model_Stock/08ac7c80-0f13-43c9-a538-683eb6927b59.json deleted file mode 100644 index 8afe613870ef34ecf6b2c81d225966c5de267485..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel_V2_ALT-8B-Model_Stock/08ac7c80-0f13-43c9-a538-683eb6927b59.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_BaeZel_V2_ALT-8B-Model_Stock/1762652579.567195", - "retrieved_timestamp": "1762652579.567196", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/BaeZel_V2_ALT-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/BaeZel_V2_ALT-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7676675665013276 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5373871612758611 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4185833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3946974734042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel_V3-8B-Model_Stock/91ec0c61-73ca-463f-b3be-3386293e4fc0.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel_V3-8B-Model_Stock/91ec0c61-73ca-463f-b3be-3386293e4fc0.json deleted file mode 100644 index 6a6fea4daf7983838a44c6f4bf3318f8ffaa23ab..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel_V3-8B-Model_Stock/91ec0c61-73ca-463f-b3be-3386293e4fc0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_BaeZel_V3-8B-Model_Stock/1762652579.5674188", - "retrieved_timestamp": "1762652579.56742", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/BaeZel_V3-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/BaeZel_V3-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7831797408653485 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.539231076759135 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18957703927492447 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41743749999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3887965425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Blunt_Edge-8B-SLERP/35807c64-beed-4022-a4ba-1284c5f6124f.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Blunt_Edge-8B-SLERP/35807c64-beed-4022-a4ba-1284c5f6124f.json deleted file mode 100644 index 5cde93855c1b9d443b7f2729667b789987713f91..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Blunt_Edge-8B-SLERP/35807c64-beed-4022-a4ba-1284c5f6124f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Blunt_Edge-8B-SLERP/1762652579.567633", - "retrieved_timestamp": "1762652579.5676339", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Blunt_Edge-8B-SLERP", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Blunt_Edge-8B-SLERP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7496575752337131 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5389470863694941 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.417375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37666223404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BulkUp/3c2e7750-3257-4012-8b43-44387707170c.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BulkUp/3c2e7750-3257-4012-8b43-44387707170c.json deleted file mode 100644 index 1ae3a6f226ec127ae39cc5cf40ff964807cfaacc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BulkUp/3c2e7750-3257-4012-8b43-44387707170c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_BulkUp/1762652579.567868", - "retrieved_timestamp": "1762652579.567869", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/BulkUp", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/BulkUp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.177804891022487 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28698602947692575 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3446666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11095412234042554 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Cadence-8B-LINEAR/8be55d6b-7fe0-41cf-86a6-66327dd88003.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Cadence-8B-LINEAR/8be55d6b-7fe0-41cf-86a6-66327dd88003.json deleted file mode 100644 index 09b67b6c604700e0436efd1b7b386db07972a218..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Cadence-8B-LINEAR/8be55d6b-7fe0-41cf-86a6-66327dd88003.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Cadence-8B-LINEAR/1762652579.568077", - "retrieved_timestamp": "1762652579.568078", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Cadence-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Cadence-8B-LINEAR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7682172192006099 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5433358555450108 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16767371601208458 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41734374999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3803191489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Caelid-8B-Model_Stock/8b15f9a3-6f39-4210-b48f-4dc5569114e2.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Caelid-8B-Model_Stock/8b15f9a3-6f39-4210-b48f-4dc5569114e2.json deleted file mode 100644 index ebb4c22fffee1f902e3e711af99e4a59c53bdab5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Caelid-8B-Model_Stock/8b15f9a3-6f39-4210-b48f-4dc5569114e2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Caelid-8B-Model_Stock/1762652579.5682912", - "retrieved_timestamp": "1762652579.5682921", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Caelid-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Caelid-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7247281657114235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5459605196913864 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1510574018126888 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4001041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3816489361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Casuar-9B-Model_Stock/7c5c8fd8-2fbb-41f3-88f3-92a544200204.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Casuar-9B-Model_Stock/7c5c8fd8-2fbb-41f3-88f3-92a544200204.json deleted file mode 100644 index 6e6613787a784fa12c74f42a23401cf88fe3738e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Casuar-9B-Model_Stock/7c5c8fd8-2fbb-41f3-88f3-92a544200204.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Casuar-9B-Model_Stock/1762652579.5685189", - "retrieved_timestamp": "1762652579.5685189", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Casuar-9B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Casuar-9B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7764852812759035 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6106681877306871 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447986577181208 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41654166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4156416223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Condensed_Milk-8B-Model_Stock/58573d8e-602a-4088-8dec-a738b7e55e9c.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Condensed_Milk-8B-Model_Stock/58573d8e-602a-4088-8dec-a738b7e55e9c.json deleted file mode 100644 index a17811bfaf8e089fe16f481fabc8bcd12ef527b6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Condensed_Milk-8B-Model_Stock/58573d8e-602a-4088-8dec-a738b7e55e9c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Condensed_Milk-8B-Model_Stock/1762652579.568758", - "retrieved_timestamp": "1762652579.568759", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Condensed_Milk-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Condensed_Milk-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7536292592543341 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5434864122121906 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17447129909365558 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41601041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38763297872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_CoolerCoder-8B-LINEAR/b3bc4e42-5850-45bd-a0a1-ff6779c04fce.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_CoolerCoder-8B-LINEAR/b3bc4e42-5850-45bd-a0a1-ff6779c04fce.json deleted file mode 100644 index 3652f21000ac6d2db2a87e7b8ca64f1dd3115938..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_CoolerCoder-8B-LINEAR/b3bc4e42-5850-45bd-a0a1-ff6779c04fce.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_CoolerCoder-8B-LINEAR/1762652579.568993", - "retrieved_timestamp": "1762652579.568993", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/CoolerCoder-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/CoolerCoder-8B-LINEAR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4519286603988528 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4761504835496542 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07930513595166164 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3963541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31590757978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Damasteel-8B-LINEAR/b0a2ef10-8705-4eae-892d-51f3633dcd87.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Damasteel-8B-LINEAR/b0a2ef10-8705-4eae-892d-51f3633dcd87.json deleted file mode 100644 index 215188c0e60aca4003506e8aa69325674de5e27d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Damasteel-8B-LINEAR/b0a2ef10-8705-4eae-892d-51f3633dcd87.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Damasteel-8B-LINEAR/1762652579.569221", - "retrieved_timestamp": "1762652579.569222", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Damasteel-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Damasteel-8B-LINEAR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7384417789243651 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5388142176959776 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16691842900302115 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42124999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3779089095744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Dearly_Beloved-8B-TIES/3d46ee0f-8ec0-4723-ac8d-fe88db7053c1.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Dearly_Beloved-8B-TIES/3d46ee0f-8ec0-4723-ac8d-fe88db7053c1.json deleted file mode 100644 index 6450f21ef59015c45ea7b3faadeec19f2206da11..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Dearly_Beloved-8B-TIES/3d46ee0f-8ec0-4723-ac8d-fe88db7053c1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Dearly_Beloved-8B-TIES/1762652579.569437", - "retrieved_timestamp": "1762652579.569438", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Dearly_Beloved-8B-TIES", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Dearly_Beloved-8B-TIES" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8266687943545348 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4049833102731906 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21148036253776434 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41746875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2826628989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Decayed-8B-LINEAR/5658866d-fd86-4203-b14f-84f9a4784028.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Decayed-8B-LINEAR/5658866d-fd86-4203-b14f-84f9a4784028.json deleted file mode 100644 index d6f6a4b479fc1fe28a594e1fa6775a3b473f16e2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Decayed-8B-LINEAR/5658866d-fd86-4203-b14f-84f9a4784028.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Decayed-8B-LINEAR/1762652579.569654", - "retrieved_timestamp": "1762652579.569655", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Decayed-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Decayed-8B-LINEAR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7676176988169169 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5417014088773181 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1714501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4186145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37632978723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Derivative-8B-Model_Stock/9ef7e716-8638-46ac-a455-f601c1cfddc1.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Derivative-8B-Model_Stock/9ef7e716-8638-46ac-a455-f601c1cfddc1.json deleted file mode 100644 index cc763e3c073def93d4bde1b0f33b58b4a4333849..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Derivative-8B-Model_Stock/9ef7e716-8638-46ac-a455-f601c1cfddc1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Derivative-8B-Model_Stock/1762652579.569859", - "retrieved_timestamp": "1762652579.56986", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Derivative-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Derivative-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7667433520835827 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5395493987763994 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17900302114803626 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42004166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3810671542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Derivative_V2-8B-Model_Stock/3320dceb-b5ef-4267-81d3-b6fe2a415eee.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Derivative_V2-8B-Model_Stock/3320dceb-b5ef-4267-81d3-b6fe2a415eee.json deleted file mode 100644 index 966a070c0bd1c366972c37acb4295428a162764b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Derivative_V2-8B-Model_Stock/3320dceb-b5ef-4267-81d3-b6fe2a415eee.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Derivative_V2-8B-Model_Stock/1762652579.5701172", - "retrieved_timestamp": "1762652579.570118", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Derivative_V2-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Derivative_V2-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7536791269387447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5392643954415269 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41229166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38563829787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Derivative_V2_ALT-8B-Model_Stock/ac19b0a8-1955-4bab-b7ae-451a84dc09c6.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Derivative_V2_ALT-8B-Model_Stock/ac19b0a8-1955-4bab-b7ae-451a84dc09c6.json deleted file mode 100644 index 8dc10f445311a113079946c3710e60bb0ee95694..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Derivative_V2_ALT-8B-Model_Stock/ac19b0a8-1955-4bab-b7ae-451a84dc09c6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Derivative_V2_ALT-8B-Model_Stock/1762652579.570343", - "retrieved_timestamp": "1762652579.570344", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Derivative_V2_ALT-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Derivative_V2_ALT-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7719639445560003 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5365351570462934 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18806646525679757 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41346875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38821476063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Derivative_V3-8B-Model_Stock/54f51897-7b47-4e95-9c1a-58ecd64caa96.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Derivative_V3-8B-Model_Stock/54f51897-7b47-4e95-9c1a-58ecd64caa96.json deleted file mode 100644 index af833e3518787ba9b813d33de8a7e87766bf0a37..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Derivative_V3-8B-Model_Stock/54f51897-7b47-4e95-9c1a-58ecd64caa96.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Derivative_V3-8B-Model_Stock/1762652579.570688", - "retrieved_timestamp": "1762652579.570689", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Derivative_V3-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Derivative_V3-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6963767248677952 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.524319745545524 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4149895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35023271276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Elusive_Dragon_Heart-8B-LINEAR/fbc53f61-cb3b-4f85-a724-fc07c6912c22.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Elusive_Dragon_Heart-8B-LINEAR/fbc53f61-cb3b-4f85-a724-fc07c6912c22.json deleted file mode 100644 index 81bbba4e2171090414912476109cda645c407a22..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Elusive_Dragon_Heart-8B-LINEAR/fbc53f61-cb3b-4f85-a724-fc07c6912c22.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Elusive_Dragon_Heart-8B-LINEAR/1762652579.570945", - "retrieved_timestamp": "1762652579.570946", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Elusive_Dragon_Heart-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Elusive_Dragon_Heart-8B-LINEAR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7131378076836128 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5456414280881592 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14803625377643503 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4145520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3813996010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Emu_Eggs-9B-Model_Stock/9343177e-5432-47c7-9fb6-90f2dc9125e5.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Emu_Eggs-9B-Model_Stock/9343177e-5432-47c7-9fb6-90f2dc9125e5.json deleted file mode 100644 index 124f61de381eedf6d9839a1caee59bdf407c41b7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Emu_Eggs-9B-Model_Stock/9343177e-5432-47c7-9fb6-90f2dc9125e5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Emu_Eggs-9B-Model_Stock/1762652579.571181", - "retrieved_timestamp": "1762652579.571182", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Emu_Eggs-9B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Emu_Eggs-9B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7606982805622415 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6051657213517168 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20996978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4227061170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Eunoia_Vespera-8B-LINEAR/5a835cef-3db8-40c9-8ae3-022d0719c89e.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Eunoia_Vespera-8B-LINEAR/5a835cef-3db8-40c9-8ae3-022d0719c89e.json deleted file mode 100644 index 6f1ba7362d97b44e1033940ecca75cc22b2f671a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Eunoia_Vespera-8B-LINEAR/5a835cef-3db8-40c9-8ae3-022d0719c89e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Eunoia_Vespera-8B-LINEAR/1762652579.571407", - "retrieved_timestamp": "1762652579.571407", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Eunoia_Vespera-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Eunoia_Vespera-8B-LINEAR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7235291249440374 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5399310621081937 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1540785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4184895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38389295212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Fu_sion_HA-8B-SLERP/5d6eb91b-518c-41ae-9e52-bb741b005601.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Fu_sion_HA-8B-SLERP/5d6eb91b-518c-41ae-9e52-bb741b005601.json deleted file mode 100644 index d5130f4c9265bb6bc0ee9b86f4a6941dc919c1f7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Fu_sion_HA-8B-SLERP/5d6eb91b-518c-41ae-9e52-bb741b005601.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Fu_sion_HA-8B-SLERP/1762652579.57162", - "retrieved_timestamp": "1762652579.5716212", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Fu_sion_HA-8B-SLERP", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Fu_sion_HA-8B-SLERP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7609232392274721 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5372804197028272 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17522658610271905 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41601041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38248005319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_HOT_STINKING_GARBAGE/70471d77-adb1-49df-ab72-8f43f379ab23.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_HOT_STINKING_GARBAGE/70471d77-adb1-49df-ab72-8f43f379ab23.json deleted file mode 100644 index 9e69daa4ead02f79bcb1a84a7e6ea466e4a16ea4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_HOT_STINKING_GARBAGE/70471d77-adb1-49df-ab72-8f43f379ab23.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_HOT_STINKING_GARBAGE/1762652579.571834", - "retrieved_timestamp": "1762652579.5718348", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/HOT_STINKING_GARBAGE", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/HOT_STINKING_GARBAGE" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5754265349273262 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4884000866161456 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42500000000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30169547872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_H_the_eighth-8B-LINEAR/2bbec710-ce13-4fa3-861b-fce8eee26b3b.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_H_the_eighth-8B-LINEAR/2bbec710-ce13-4fa3-861b-fce8eee26b3b.json deleted file mode 100644 index 5502a033d5fef229b52c9f6e48263d67b18d9b6d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_H_the_eighth-8B-LINEAR/2bbec710-ce13-4fa3-861b-fce8eee26b3b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_H_the_eighth-8B-LINEAR/1762652579.572039", - "retrieved_timestamp": "1762652579.5720398", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/H_the_eighth-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/H_the_eighth-8B-LINEAR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7469347996648892 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5383752114303682 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17749244712990936 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41728125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3823969414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Happy_New_Year-8B-Model_Stock/170808e4-7506-44c9-8bb7-5dd92037a347.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Happy_New_Year-8B-Model_Stock/170808e4-7506-44c9-8bb7-5dd92037a347.json deleted file mode 100644 index 3c8b5302c6bc9b129341d34331ed0e29bef46142..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Happy_New_Year-8B-Model_Stock/170808e4-7506-44c9-8bb7-5dd92037a347.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Happy_New_Year-8B-Model_Stock/1762652579.572258", - "retrieved_timestamp": "1762652579.5722592", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Happy_New_Year-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Happy_New_Year-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7615726272955757 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5367913866457493 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1593655589123867 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4185520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3878823138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Heart_Stolen-8B-Model_Stock/86b9c040-4c5e-413d-ac23-1603c499b5de.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Heart_Stolen-8B-Model_Stock/86b9c040-4c5e-413d-ac23-1603c499b5de.json deleted file mode 100644 index 37464c94bf2ad5347c9ec3be55b16edd689cc7e9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Heart_Stolen-8B-Model_Stock/86b9c040-4c5e-413d-ac23-1603c499b5de.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Heart_Stolen-8B-Model_Stock/1762652579.572714", - "retrieved_timestamp": "1762652579.5727181", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Heart_Stolen-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Heart_Stolen-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7244533393617822 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5395443745186658 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17220543806646527 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41622916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37940492021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Heart_Stolen-ALT-8B-Model_Stock/141d8908-50cb-4457-a0f0-93d55d1c705b.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Heart_Stolen-ALT-8B-Model_Stock/141d8908-50cb-4457-a0f0-93d55d1c705b.json deleted file mode 100644 index 176bb423bff6662899c2bec0b71692f1331df8b2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Heart_Stolen-ALT-8B-Model_Stock/141d8908-50cb-4457-a0f0-93d55d1c705b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Heart_Stolen-ALT-8B-Model_Stock/1762652579.573096", - "retrieved_timestamp": "1762652579.573097", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Heart_Stolen-ALT-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Heart_Stolen-ALT-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7183584001560305 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.526338467747489 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40549999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37724401595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Here_We_Go_Again-8B-SLERP/1c21cfd2-2b01-44d3-8daa-41493a743a75.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Here_We_Go_Again-8B-SLERP/1c21cfd2-2b01-44d3-8daa-41493a743a75.json deleted file mode 100644 index d9c59156f431c60a1e6d8703a385d29b06475cfd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Here_We_Go_Again-8B-SLERP/1c21cfd2-2b01-44d3-8daa-41493a743a75.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Here_We_Go_Again-8B-SLERP/1762652579.573366", - "retrieved_timestamp": "1762652579.573367", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Here_We_Go_Again-8B-SLERP", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Here_We_Go_Again-8B-SLERP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7442120240960651 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5460182474181831 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1729607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4186770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873005319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Howdy-8B-LINEAR/88df4a25-089c-4f21-b403-a1f5dad112b3.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Howdy-8B-LINEAR/88df4a25-089c-4f21-b403-a1f5dad112b3.json deleted file mode 100644 index e2c569e5d8572f91ff78109ffc43964788f155fd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Howdy-8B-LINEAR/88df4a25-089c-4f21-b403-a1f5dad112b3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Howdy-8B-LINEAR/1762652579.573699", - "retrieved_timestamp": "1762652579.5737002", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Howdy-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Howdy-8B-LINEAR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7377923908562614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5383981582614435 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17749244712990936 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41213541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3806515957446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Incidental-8B-Model_Stock/102ed90e-cbe3-4219-b9c6-cec82c78941f.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Incidental-8B-Model_Stock/102ed90e-cbe3-4219-b9c6-cec82c78941f.json deleted file mode 100644 index 9e62e6efdfddd9ec906f63670b6082be1de75bac..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Incidental-8B-Model_Stock/102ed90e-cbe3-4219-b9c6-cec82c78941f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Incidental-8B-Model_Stock/1762652579.573979", - "retrieved_timestamp": "1762652579.5739799", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Incidental-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Incidental-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.748183708116686 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5452070612873019 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16163141993957703 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42401041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873005319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Irina-8B-model_stock/60aebc6f-b3ee-4b32-8b89-4359c990fb23.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Irina-8B-model_stock/60aebc6f-b3ee-4b32-8b89-4359c990fb23.json deleted file mode 100644 index cf7e83e00701459b1e1aa24bb298de28edaf4cf1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Irina-8B-model_stock/60aebc6f-b3ee-4b32-8b89-4359c990fb23.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Irina-8B-model_stock/1762652579.574285", - "retrieved_timestamp": "1762652579.574286", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Irina-8B-model_stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Irina-8B-model_stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6799403360860294 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5236638956084764 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40029166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35738031914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Kindling-8B-Model_Stock/8ee9ad54-c6ca-4afc-931b-ffe1fd1d5971.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Kindling-8B-Model_Stock/8ee9ad54-c6ca-4afc-931b-ffe1fd1d5971.json deleted file mode 100644 index ed0a794aeac5eee5b71dacf668352a916b0080c3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Kindling-8B-Model_Stock/8ee9ad54-c6ca-4afc-931b-ffe1fd1d5971.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Kindling-8B-Model_Stock/1762652579.57468", - "retrieved_timestamp": "1762652579.574682", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Kindling-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Kindling-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7308231049171753 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5492054832931256 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17522658610271905 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4068333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3829787234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_L3.1-BaeZel-8B-Della/6c7dfbaf-648e-4c4a-907f-8639ab1c7312.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_L3.1-BaeZel-8B-Della/6c7dfbaf-648e-4c4a-907f-8639ab1c7312.json deleted file mode 100644 index fffb2b7d12ec220b8c0034cf159a96721c640cd9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_L3.1-BaeZel-8B-Della/6c7dfbaf-648e-4c4a-907f-8639ab1c7312.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_L3.1-BaeZel-8B-Della/1762652579.575009", - "retrieved_timestamp": "1762652579.57501", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/L3.1-BaeZel-8B-Della", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/L3.1-BaeZel-8B-Della" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5180243974875552 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5448449542185521 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17447129909365558 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4199791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3902094414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Laughing_Stock-8B-Model_Stock/cf1b2ab2-d18b-44c1-b0ed-476dba32c034.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Laughing_Stock-8B-Model_Stock/cf1b2ab2-d18b-44c1-b0ed-476dba32c034.json deleted file mode 100644 index 0255c1efbd6bbdef31ae1c15315240320de07aa7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Laughing_Stock-8B-Model_Stock/cf1b2ab2-d18b-44c1-b0ed-476dba32c034.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Laughing_Stock-8B-Model_Stock/1762652579.5752351", - "retrieved_timestamp": "1762652579.575236", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Laughing_Stock-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Laughing_Stock-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7189579205397235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5449429262155 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1578549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4145520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3764128989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Lava_Lamp-8B-SLERP/26d89e91-7f52-4913-a4e0-3275cca1d8d7.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Lava_Lamp-8B-SLERP/26d89e91-7f52-4913-a4e0-3275cca1d8d7.json deleted file mode 100644 index 72514129bd0ff3226d89d5277622bef9a39b06fa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Lava_Lamp-8B-SLERP/26d89e91-7f52-4913-a4e0-3275cca1d8d7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Lava_Lamp-8B-SLERP/1762652579.575455", - "retrieved_timestamp": "1762652579.575455", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Lava_Lamp-8B-SLERP", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Lava_Lamp-8B-SLERP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7381170848903134 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5367586873360172 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17371601208459214 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4187083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_LemonP-8B-Model_Stock/f13fb9a9-f53c-4c7e-9e29-fabb010a617b.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_LemonP-8B-Model_Stock/f13fb9a9-f53c-4c7e-9e29-fabb010a617b.json deleted file mode 100644 index 4c72d7b2a6a6ff09308fe73e6800a8365c023057..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_LemonP-8B-Model_Stock/f13fb9a9-f53c-4c7e-9e29-fabb010a617b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_LemonP-8B-Model_Stock/1762652579.575685", - "retrieved_timestamp": "1762652579.575686", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/LemonP-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/LemonP-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7676176988169169 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5439348074265458 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17673716012084592 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40810416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40043218085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Lydia_of_Whiterun-8B-LINEAR/cee29aba-b6c1-42a2-88d0-a92080b3c083.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Lydia_of_Whiterun-8B-LINEAR/cee29aba-b6c1-42a2-88d0-a92080b3c083.json deleted file mode 100644 index 0105a8bb29179e9adc17e8b3aaa52512e1164dd0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Lydia_of_Whiterun-8B-LINEAR/cee29aba-b6c1-42a2-88d0-a92080b3c083.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Lydia_of_Whiterun-8B-LINEAR/1762652579.575901", - "retrieved_timestamp": "1762652579.575901", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Lydia_of_Whiterun-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Lydia_of_Whiterun-8B-LINEAR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.760323718843779 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5379527944750039 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17673716012084592 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42506249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800698138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Matryoshka-8B-LINEAR/2f8ce822-9278-49e5-878a-69439e794623.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Matryoshka-8B-LINEAR/2f8ce822-9278-49e5-878a-69439e794623.json deleted file mode 100644 index 202fb4f1ef78ffc65bb27e2ed79dc7a02a00e5c3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Matryoshka-8B-LINEAR/2f8ce822-9278-49e5-878a-69439e794623.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Matryoshka-8B-LINEAR/1762652579.576119", - "retrieved_timestamp": "1762652579.5761201", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Matryoshka-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Matryoshka-8B-LINEAR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7262519005128614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5444280006376178 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17522658610271905 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42524999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3865525265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Mercury_In_Retrograde-8b-Model-Stock/eff11f37-ec26-4866-8109-0ee6dcac7fec.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Mercury_In_Retrograde-8b-Model-Stock/eff11f37-ec26-4866-8109-0ee6dcac7fec.json deleted file mode 100644 index 7605ab3318d32341603f1bf7aea8cb3d02de0f3a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Mercury_In_Retrograde-8b-Model-Stock/eff11f37-ec26-4866-8109-0ee6dcac7fec.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Mercury_In_Retrograde-8b-Model-Stock/1762652579.576331", - "retrieved_timestamp": "1762652579.576332", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Mercury_In_Retrograde-8b-Model-Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Mercury_In_Retrograde-8b-Model-Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7296240641497892 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5390507664719518 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4198854166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38289561170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Minthy-8B-Model_Stock/394ac507-8bdb-4d06-bf6e-87911443ec2b.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Minthy-8B-Model_Stock/394ac507-8bdb-4d06-bf6e-87911443ec2b.json deleted file mode 100644 index 6eee39abf7eae6431aa05f84c97affbf5c45ba67..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Minthy-8B-Model_Stock/394ac507-8bdb-4d06-bf6e-87911443ec2b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Minthy-8B-Model_Stock/1762652579.5765939", - "retrieved_timestamp": "1762652579.5765948", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Minthy-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Minthy-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.765769269981427 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5352951319641014 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19184290030211482 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40940624999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3992686170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Minthy_ALT-8B-Model_Stock/709e429f-0a98-4ae6-b10f-f0546ef2d9b5.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Minthy_ALT-8B-Model_Stock/709e429f-0a98-4ae6-b10f-f0546ef2d9b5.json deleted file mode 100644 index d16b0f35c4cc2c21a0193ab15d8c8daf8c297359..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Minthy_ALT-8B-Model_Stock/709e429f-0a98-4ae6-b10f-f0546ef2d9b5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Minthy_ALT-8B-Model_Stock/1762652579.57681", - "retrieved_timestamp": "1762652579.576811", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Minthy_ALT-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Minthy_ALT-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6991992358054406 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5374800202589046 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4225208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3673537234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Minthy_V2-8B-Model_Stock/3f8011c6-6826-4788-b848-ec6938eefa7f.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Minthy_V2-8B-Model_Stock/3f8011c6-6826-4788-b848-ec6938eefa7f.json deleted file mode 100644 index bd82fdf4610d9f7fb832edf85e207dd474d97c55..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Minthy_V2-8B-Model_Stock/3f8011c6-6826-4788-b848-ec6938eefa7f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Minthy_V2-8B-Model_Stock/1762652579.5770218", - "retrieved_timestamp": "1762652579.577023", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Minthy_V2-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Minthy_V2-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7125881549843305 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5491095928821667 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1593655589123867 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4198854166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37367021276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Minus_Penus-8B-Model_Stock/395b9855-e394-46c9-b95a-75203399aed4.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Minus_Penus-8B-Model_Stock/395b9855-e394-46c9-b95a-75203399aed4.json deleted file mode 100644 index 8b32069def018577458e9e30d6b59aa83f224271..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Minus_Penus-8B-Model_Stock/395b9855-e394-46c9-b95a-75203399aed4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Minus_Penus-8B-Model_Stock/1762652579.577236", - "retrieved_timestamp": "1762652579.577237", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Minus_Penus-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Minus_Penus-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7311477989512272 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5343781571200968 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2001510574018127 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40190624999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3751662234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Not_Even_My_Final_Form-8B-Model_Stock/bc85d435-a537-4ed0-bf4e-02d9c30b5fa3.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Not_Even_My_Final_Form-8B-Model_Stock/bc85d435-a537-4ed0-bf4e-02d9c30b5fa3.json deleted file mode 100644 index 116766542f3ad6e7de0713678656c73d343290ec..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Not_Even_My_Final_Form-8B-Model_Stock/bc85d435-a537-4ed0-bf4e-02d9c30b5fa3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Not_Even_My_Final_Form-8B-Model_Stock/1762652579.577775", - "retrieved_timestamp": "1762652579.5777762", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Not_Even_My_Final_Form-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Not_Even_My_Final_Form-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7721889032212308 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5350849793007441 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41473958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3839760638297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Nother_One-8B-Model_Stock/464f363d-ab94-4cac-8846-fbcf25be3dec.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Nother_One-8B-Model_Stock/464f363d-ab94-4cac-8846-fbcf25be3dec.json deleted file mode 100644 index 25711f8bd2f37ef3c13bec5be78fca37257d2668..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Nother_One-8B-Model_Stock/464f363d-ab94-4cac-8846-fbcf25be3dec.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Nother_One-8B-Model_Stock/1762652579.578036", - "retrieved_timestamp": "1762652579.578037", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Nother_One-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Nother_One-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6863101016414226 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5204527600425481 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15181268882175228 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38702083333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35945811170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Noxis-8B-LINEAR/8778fbef-d0f0-4a47-8adb-8e8f594d9195.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Noxis-8B-LINEAR/8778fbef-d0f0-4a47-8adb-8e8f594d9195.json deleted file mode 100644 index dd11d492689843f00967530e22dd0c0857372fbd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Noxis-8B-LINEAR/8778fbef-d0f0-4a47-8adb-8e8f594d9195.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Noxis-8B-LINEAR/1762652579.578263", - "retrieved_timestamp": "1762652579.578263", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Noxis-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Noxis-8B-LINEAR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6913057354486096 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5420956502068554 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19788519637462235 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4230833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3660239361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Nullsworn-12B-LINEAR/3f92cd91-57b4-46eb-864b-2e4870b920fc.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Nullsworn-12B-LINEAR/3f92cd91-57b4-46eb-864b-2e4870b920fc.json deleted file mode 100644 index 714856aa45edfcba46fd5a1e2edaa060774ec02f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Nullsworn-12B-LINEAR/3f92cd91-57b4-46eb-864b-2e4870b920fc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Nullsworn-12B-LINEAR/1762652579.578492", - "retrieved_timestamp": "1762652579.5784929", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Nullsworn-12B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Nullsworn-12B-LINEAR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44356086295473784 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5483045026677609 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43495833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3645279255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Nwah-8B-Model_Stock/34dec14e-846a-4037-8dbd-f1d1599d5adf.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Nwah-8B-Model_Stock/34dec14e-846a-4037-8dbd-f1d1599d5adf.json deleted file mode 100644 index ed1ad924ae656b7aaf6b34aeb4d65ef4c589f70d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Nwah-8B-Model_Stock/34dec14e-846a-4037-8dbd-f1d1599d5adf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Nwah-8B-Model_Stock/1762652579.578718", - "retrieved_timestamp": "1762652579.578719", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Nwah-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Nwah-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7715893828375378 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5384269019541996 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4039479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3807347074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_ONeil-model_stock-8B/7f5fa4e0-e28c-46df-acbd-22e7b010a407.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_ONeil-model_stock-8B/7f5fa4e0-e28c-46df-acbd-22e7b010a407.json deleted file mode 100644 index 4a4a7f16273ef9cb67cffe05f37f512fc3dc2eb0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_ONeil-model_stock-8B/7f5fa4e0-e28c-46df-acbd-22e7b010a407.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_ONeil-model_stock-8B/1762652579.578939", - "retrieved_timestamp": "1762652579.57894", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/ONeil-model_stock-8B", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/ONeil-model_stock-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6785662043378236 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5548337982400763 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10120845921450151 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41734374999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35987367021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Oh_Boy-8B-LINEAR/393ad85d-6b8b-466d-99e0-6a89bf0ce66e.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Oh_Boy-8B-LINEAR/393ad85d-6b8b-466d-99e0-6a89bf0ce66e.json deleted file mode 100644 index 68abbce7f57a5c57d8f956ef8758fa907b756f21..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Oh_Boy-8B-LINEAR/393ad85d-6b8b-466d-99e0-6a89bf0ce66e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Oh_Boy-8B-LINEAR/1762652579.5791628", - "retrieved_timestamp": "1762652579.5791638", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Oh_Boy-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Oh_Boy-8B-LINEAR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7503069633018169 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5375114406292553 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1782477341389728 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4107708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3848902925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_OrangeJ-8B-Model_Stock/d436f2a4-ebd5-4712-871a-0616f491bda4.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_OrangeJ-8B-Model_Stock/d436f2a4-ebd5-4712-871a-0616f491bda4.json deleted file mode 100644 index 07b0a699eff7c6a3bb1eeb0c344b2120b99760f3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_OrangeJ-8B-Model_Stock/d436f2a4-ebd5-4712-871a-0616f491bda4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_OrangeJ-8B-Model_Stock/1762652579.57939", - "retrieved_timestamp": "1762652579.579391", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/OrangeJ-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/OrangeJ-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7841039552830933 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5413478053905038 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4027708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3968583776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Promissum_Mane-8B-LINEAR-lorablated/827c075e-78a2-4e4b-a561-b95728cdf2b2.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Promissum_Mane-8B-LINEAR-lorablated/827c075e-78a2-4e4b-a561-b95728cdf2b2.json deleted file mode 100644 index 4ce48a0783d18370d67a4d2bf8d930ad9168df4e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Promissum_Mane-8B-LINEAR-lorablated/827c075e-78a2-4e4b-a561-b95728cdf2b2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Promissum_Mane-8B-LINEAR-lorablated/1762652579.579823", - "retrieved_timestamp": "1762652579.5798242", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Promissum_Mane-8B-LINEAR-lorablated", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Promissum_Mane-8B-LINEAR-lorablated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7156356245872064 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5435183631990302 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15332326283987915 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4197916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37391954787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Promissum_Mane-8B-LINEAR/d44a7888-1463-4492-9359-f8287a8f7f01.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Promissum_Mane-8B-LINEAR/d44a7888-1463-4492-9359-f8287a8f7f01.json deleted file mode 100644 index 63d99304488e2eac930079140b6c421d902eb583..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Promissum_Mane-8B-LINEAR/d44a7888-1463-4492-9359-f8287a8f7f01.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Promissum_Mane-8B-LINEAR/1762652579.5796108", - "retrieved_timestamp": "1762652579.579612", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Promissum_Mane-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Promissum_Mane-8B-LINEAR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7150361042035134 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5457684398146738 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1555891238670695 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42004166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38505651595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_RPMash-8B-Model_Stock/aa8e7299-0c36-4f27-b8c9-e9a5e4da8c97.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_RPMash-8B-Model_Stock/aa8e7299-0c36-4f27-b8c9-e9a5e4da8c97.json deleted file mode 100644 index 9654d3bd7889a8dd6c0ee603b742fd843242a45f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_RPMash-8B-Model_Stock/aa8e7299-0c36-4f27-b8c9-e9a5e4da8c97.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_RPMash-8B-Model_Stock/1762652579.5800488", - "retrieved_timestamp": "1762652579.58005", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/RPMash-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/RPMash-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4563502617499346 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5169088291675549 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.405375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3603723404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_RPMash_V3-8B-Model_Stock/c7e0c75d-f0c1-4a44-b540-607e99c69e92.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_RPMash_V3-8B-Model_Stock/c7e0c75d-f0c1-4a44-b540-607e99c69e92.json deleted file mode 100644 index d9c6c8429fbfb8c2f069fe9942dbb2abbe46afb0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_RPMash_V3-8B-Model_Stock/c7e0c75d-f0c1-4a44-b540-607e99c69e92.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_RPMash_V3-8B-Model_Stock/1762652579.580262", - "retrieved_timestamp": "1762652579.580263", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/RPMash_V3-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/RPMash_V3-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.70491961329273 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5217453397523113 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37775000000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36136968085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Rusted_Gold-8B-LINEAR/70f7842f-1111-4c6a-914d-35e48537d1fc.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Rusted_Gold-8B-LINEAR/70f7842f-1111-4c6a-914d-35e48537d1fc.json deleted file mode 100644 index 867a4619ec1e22a78759da399c22301f493c831b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Rusted_Gold-8B-LINEAR/70f7842f-1111-4c6a-914d-35e48537d1fc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Rusted_Gold-8B-LINEAR/1762652579.58047", - "retrieved_timestamp": "1762652579.580471", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Rusted_Gold-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Rusted_Gold-8B-LINEAR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7296240641497892 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5386646439313688 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41775 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37799202127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Rusted_Platinum-8B-LINEAR/4b9a1e5a-dc99-44d9-b4f4-6bef1eb285ca.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Rusted_Platinum-8B-LINEAR/4b9a1e5a-dc99-44d9-b4f4-6bef1eb285ca.json deleted file mode 100644 index 0a20e5ce836fac2ccce2aab1e57f1aca469f22d5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Rusted_Platinum-8B-LINEAR/4b9a1e5a-dc99-44d9-b4f4-6bef1eb285ca.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Rusted_Platinum-8B-LINEAR/1762652579.580692", - "retrieved_timestamp": "1762652579.580693", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Rusted_Platinum-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Rusted_Platinum-8B-LINEAR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7179838384375679 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5427868416987739 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17220543806646527 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39666666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37300531914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Rusted_Platinum-8B-Model_Stock/219e3183-8d9c-4188-a550-72d7f20ff1ec.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Rusted_Platinum-8B-Model_Stock/219e3183-8d9c-4188-a550-72d7f20ff1ec.json deleted file mode 100644 index 119929b3551355a647120e647e05534c0e356d7c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Rusted_Platinum-8B-Model_Stock/219e3183-8d9c-4188-a550-72d7f20ff1ec.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Rusted_Platinum-8B-Model_Stock/1762652579.580914", - "retrieved_timestamp": "1762652579.580915", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Rusted_Platinum-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Rusted_Platinum-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44078821970150317 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5242840148078765 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37406249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3546376329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Sellen-8B-model_stock/45e281e8-f28c-40a5-92e4-c16b627adb32.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Sellen-8B-model_stock/45e281e8-f28c-40a5-92e4-c16b627adb32.json deleted file mode 100644 index a0b5b091118d5639f841852d0e0027d6c5be277c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Sellen-8B-model_stock/45e281e8-f28c-40a5-92e4-c16b627adb32.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Sellen-8B-model_stock/1762652579.5811431", - "retrieved_timestamp": "1762652579.581144", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Sellen-8B-model_stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Sellen-8B-model_stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7112893788481229 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5231680557624704 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1336858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3960416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35696476063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Something-8B-Model_Stock/1d1bf908-44fb-4b87-b52d-845a1cdafc08.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Something-8B-Model_Stock/1d1bf908-44fb-4b87-b52d-845a1cdafc08.json deleted file mode 100644 index e20e1de5f95d61953915bfef98abcc0a7da2c742..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Something-8B-Model_Stock/1d1bf908-44fb-4b87-b52d-845a1cdafc08.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Something-8B-Model_Stock/1762652579.5815392", - "retrieved_timestamp": "1762652579.58154", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Something-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Something-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5043107842746135 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5395029370473196 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41873958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3885472074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Spring_Dusk-8B-SCE/e9124a70-037d-41ed-becb-953382a3f43a.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Spring_Dusk-8B-SCE/e9124a70-037d-41ed-becb-953382a3f43a.json deleted file mode 100644 index 7bb2dd3064cd2a82546cbb45c8430a7af1f290f1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Spring_Dusk-8B-SCE/e9124a70-037d-41ed-becb-953382a3f43a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Spring_Dusk-8B-SCE/1762652579.581773", - "retrieved_timestamp": "1762652579.581774", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Spring_Dusk-8B-SCE", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Spring_Dusk-8B-SCE" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6514636719459922 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5635271357931001 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45997916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3435837765957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Summer_Dawn-8B-SCE/7d7eefa4-193a-4158-a903-9a8484b36e9a.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Summer_Dawn-8B-SCE/7d7eefa4-193a-4158-a903-9a8484b36e9a.json deleted file mode 100644 index 6fc6bc4a1655ddd88826813e88f2a22f787f49a2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Summer_Dawn-8B-SCE/7d7eefa4-193a-4158-a903-9a8484b36e9a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Summer_Dawn-8B-SCE/1762652579.581994", - "retrieved_timestamp": "1762652579.581994", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Summer_Dawn-8B-SCE", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Summer_Dawn-8B-SCE" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6642032030567783 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.539111375413361 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17220543806646527 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41204166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37533244680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Summer_Dusk-8B-TIES/a2cad434-61a0-40be-8740-6c6a8e3cea25.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Summer_Dusk-8B-TIES/a2cad434-61a0-40be-8740-6c6a8e3cea25.json deleted file mode 100644 index 3c8f275e5412301a28a268e69df658da570dec18..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Summer_Dusk-8B-TIES/a2cad434-61a0-40be-8740-6c6a8e3cea25.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Summer_Dusk-8B-TIES/1762652579.582258", - "retrieved_timestamp": "1762652579.582258", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Summer_Dusk-8B-TIES", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Summer_Dusk-8B-TIES" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4922206412319312 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5359662578395569 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18051359516616314 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4266770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3855551861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Summer_Rain-8B-SCE/9f4730ec-a162-455c-83ef-c8fa9ebd036c.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Summer_Rain-8B-SCE/9f4730ec-a162-455c-83ef-c8fa9ebd036c.json deleted file mode 100644 index 7f34c06e7d9198fa03b34389cef5ed0752502f94..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Summer_Rain-8B-SCE/9f4730ec-a162-455c-83ef-c8fa9ebd036c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Summer_Rain-8B-SCE/1762652579.582465", - "retrieved_timestamp": "1762652579.5824661", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Summer_Rain-8B-SCE", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Summer_Rain-8B-SCE" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5459259210007226 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5845948417986419 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4477291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3550531914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Summer_Rain-8B-TIES/1704c33f-e00e-4fbb-be4c-3d1fe85d635f.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Summer_Rain-8B-TIES/1704c33f-e00e-4fbb-be4c-3d1fe85d635f.json deleted file mode 100644 index f9f31e11a68d48540d310537116db6eaf0209efe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Summer_Rain-8B-TIES/1704c33f-e00e-4fbb-be4c-3d1fe85d635f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Summer_Rain-8B-TIES/1762652579.582679", - "retrieved_timestamp": "1762652579.582679", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Summer_Rain-8B-TIES", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Summer_Rain-8B-TIES" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5444021861992845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5845948417986419 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4477291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3550531914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Sun-8B-Model_Stock/13b16b8d-533f-4323-a75a-e16df96b8351.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Sun-8B-Model_Stock/13b16b8d-533f-4323-a75a-e16df96b8351.json deleted file mode 100644 index 52aaf65a81581c8cf4c53ac6337eddc89875aa37..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Sun-8B-Model_Stock/13b16b8d-533f-4323-a75a-e16df96b8351.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Sun-8B-Model_Stock/1762652579.58288", - "retrieved_timestamp": "1762652579.58288", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Sun-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Sun-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7758358932077998 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5263511014407583 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20996978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40975 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38347739361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Sweetened_Condensed_Milk-8B-Model_Stock/d0461daa-d106-44ce-9d9c-03a6fef37b45.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Sweetened_Condensed_Milk-8B-Model_Stock/d0461daa-d106-44ce-9d9c-03a6fef37b45.json deleted file mode 100644 index 41608e6a0594b25097bb2770a89cd92519ea6416..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Sweetened_Condensed_Milk-8B-Model_Stock/d0461daa-d106-44ce-9d9c-03a6fef37b45.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Sweetened_Condensed_Milk-8B-Model_Stock/1762652579.5830941", - "retrieved_timestamp": "1762652579.583095", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Sweetened_Condensed_Milk-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Sweetened_Condensed_Milk-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7417142071924716 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5406287643522295 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18731117824773413 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4106770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38480718085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST02-Ignore/414bb880-e2b2-43fb-ad9b-f51d7c4b7ad4.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST02-Ignore/414bb880-e2b2-43fb-ad9b-f51d7c4b7ad4.json deleted file mode 100644 index 7b5aa339cc23207f28489c93d899b156516c430e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST02-Ignore/414bb880-e2b2-43fb-ad9b-f51d7c4b7ad4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_TEST02-Ignore/1762652579.583313", - "retrieved_timestamp": "1762652579.583314", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/TEST02-Ignore", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/TEST02-Ignore" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6118964347930158 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5601644306147606 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08685800604229607 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41985416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3468251329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST03-ignore/ceba83fe-89b2-4b8a-ba7d-ed1ad9acb070.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST03-ignore/ceba83fe-89b2-4b8a-ba7d-ed1ad9acb070.json deleted file mode 100644 index 47b5c3573950ad9013509a6ed893e8ad3eb07c55..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST03-ignore/ceba83fe-89b2-4b8a-ba7d-ed1ad9acb070.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_TEST03-ignore/1762652579.583565", - "retrieved_timestamp": "1762652579.5835662", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/TEST03-ignore", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/TEST03-ignore" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6967014189018471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5383414134372179 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16540785498489427 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4186145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37890625 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST06-ignore/15dbba84-b177-4bcd-8874-0153152f0015.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST06-ignore/15dbba84-b177-4bcd-8874-0153152f0015.json deleted file mode 100644 index b603b1f8c298f09fe0c1810ccf4af8640a5b018a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST06-ignore/15dbba84-b177-4bcd-8874-0153152f0015.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_TEST06-ignore/1762652579.583824", - "retrieved_timestamp": "1762652579.5838249", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/TEST06-ignore", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/TEST06-ignore" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7322969720342026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5509060880148441 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4224895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615359042553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST07-ignore/39b77252-2729-429b-b220-3b19ca0b6a6c.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST07-ignore/39b77252-2729-429b-b220-3b19ca0b6a6c.json deleted file mode 100644 index 4242a37fa60c85bd57ddd20008da203ca1d5796a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST07-ignore/39b77252-2729-429b-b220-3b19ca0b6a6c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_TEST07-ignore/1762652579.5841951", - "retrieved_timestamp": "1762652579.584198", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/TEST07-ignore", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/TEST07-ignore" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7399655137258031 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5561275711510345 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40937500000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3879654255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST08-ignore/79b7bdb6-82a7-466f-8d9a-b26211f4ee73.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST08-ignore/79b7bdb6-82a7-466f-8d9a-b26211f4ee73.json deleted file mode 100644 index abeccc73bb8ceeafb83775b19971c5b7b0184e1f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST08-ignore/79b7bdb6-82a7-466f-8d9a-b26211f4ee73.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_TEST08-ignore/1762652579.5845299", - "retrieved_timestamp": "1762652579.5845308", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/TEST08-ignore", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/TEST08-ignore" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7466599733152479 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5453519655444978 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18202416918429004 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40810416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3853058510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Trinas_Nectar-8B-model_stock/922fec6c-cfec-47cf-a374-5676635a5b40.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Trinas_Nectar-8B-model_stock/922fec6c-cfec-47cf-a374-5676635a5b40.json deleted file mode 100644 index d9cf1082676e7226c9b03585b537a0e5ea3161a0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Trinas_Nectar-8B-model_stock/922fec6c-cfec-47cf-a374-5676635a5b40.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Trinas_Nectar-8B-model_stock/1762652579.58478", - "retrieved_timestamp": "1762652579.5847821", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Trinas_Nectar-8B-model_stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Trinas_Nectar-8B-model_stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7259272064788096 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5256123853406084 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15256797583081572 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4067708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36178523936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_UNTESTED-VENN_1.2-8B-Model_Stock/5945660f-40e1-4c49-8f28-581f06b51e59.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_UNTESTED-VENN_1.2-8B-Model_Stock/5945660f-40e1-4c49-8f28-581f06b51e59.json deleted file mode 100644 index 416b273e840fe45cc8fadfcb4e4326f0339307c8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_UNTESTED-VENN_1.2-8B-Model_Stock/5945660f-40e1-4c49-8f28-581f06b51e59.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_UNTESTED-VENN_1.2-8B-Model_Stock/1762652579.585024", - "retrieved_timestamp": "1762652579.585025", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/UNTESTED-VENN_1.2-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/UNTESTED-VENN_1.2-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47176270074513404 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5475027267486955 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1540785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4449375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.378656914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_VENN_1.2-8B-Model_Stock/0adfce8d-0070-4375-be96-a34466851101.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_VENN_1.2-8B-Model_Stock/0adfce8d-0070-4375-be96-a34466851101.json deleted file mode 100644 index 7e9d0819e7144c82cfac1c825abcc32430e2d1fb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_VENN_1.2-8B-Model_Stock/0adfce8d-0070-4375-be96-a34466851101.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_VENN_1.2-8B-Model_Stock/1762652579.5852559", - "retrieved_timestamp": "1762652579.585257", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/VENN_1.2-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/VENN_1.2-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7226049105262924 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5458812486333333 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42001041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3720910904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_WIP-Acacia-8B-Model_Stock/d28bdd9d-53bb-498f-84cb-7d482f41d005.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_WIP-Acacia-8B-Model_Stock/d28bdd9d-53bb-498f-84cb-7d482f41d005.json deleted file mode 100644 index dd970d1bca4ea43eb5bf47705ecc2d3656ea4b4f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_WIP-Acacia-8B-Model_Stock/d28bdd9d-53bb-498f-84cb-7d482f41d005.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_WIP-Acacia-8B-Model_Stock/1762652579.5854762", - "retrieved_timestamp": "1762652579.585477", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/WIP-Acacia-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/WIP-Acacia-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6246359659038019 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5194665568943516 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16691842900302115 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4225833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37367021276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_WIP_Damascus-8B-TIES/38e5b086-4a73-4ffa-9b32-eb80405fecb5.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_WIP_Damascus-8B-TIES/38e5b086-4a73-4ffa-9b32-eb80405fecb5.json deleted file mode 100644 index a273083453ed9fd1dd8d26cbccfb7fb9b56f890e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_WIP_Damascus-8B-TIES/38e5b086-4a73-4ffa-9b32-eb80405fecb5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_WIP_Damascus-8B-TIES/1762652579.5856981", - "retrieved_timestamp": "1762652579.5856981", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/WIP_Damascus-8B-TIES", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/WIP_Damascus-8B-TIES" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4776326812856554 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5410672913070808 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16540785498489427 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41185416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37608045212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Wannabe-8B-Model_Stock/fafc0425-a4f0-4c5b-8328-5dfca7d6402f.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Wannabe-8B-Model_Stock/fafc0425-a4f0-4c5b-8328-5dfca7d6402f.json deleted file mode 100644 index e36878b91e9d535f23ea580187f8f0ffbe126511..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Wannabe-8B-Model_Stock/fafc0425-a4f0-4c5b-8328-5dfca7d6402f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Wannabe-8B-Model_Stock/1762652579.585919", - "retrieved_timestamp": "1762652579.58592", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Wannabe-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Wannabe-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7204816553411615 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5389637944785705 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17749244712990936 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41346875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.383061835106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_What_A_Thrill-8B-Model_Stock/b9fadd79-8220-4023-b92a-c38b07a90e8f.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_What_A_Thrill-8B-Model_Stock/b9fadd79-8220-4023-b92a-c38b07a90e8f.json deleted file mode 100644 index d3fcab7860be9e6e4ff3c71f584ece901bf569f0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_What_A_Thrill-8B-Model_Stock/b9fadd79-8220-4023-b92a-c38b07a90e8f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_What_A_Thrill-8B-Model_Stock/1762652579.5861409", - "retrieved_timestamp": "1762652579.586142", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/What_A_Thrill-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/What_A_Thrill-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7064433480941679 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.531144904394377 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18202416918429004 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40804166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615359042553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Winter-8B-SCE/b351842a-aa2a-494a-8159-c732f071c7c6.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Winter-8B-SCE/b351842a-aa2a-494a-8159-c732f071c7c6.json deleted file mode 100644 index eb6e9782d605d585bbadcb90b99a60849f32f401..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Winter-8B-SCE/b351842a-aa2a-494a-8159-c732f071c7c6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Winter-8B-SCE/1762652579.586359", - "retrieved_timestamp": "1762652579.58636", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Winter-8B-SCE", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Winter-8B-SCE" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7536292592543341 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5261733490323383 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19184290030211482 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38389295212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Winter_Dawn-8B-TIES/21947721-9f9a-4cc2-aa88-e1853f488167.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Winter_Dawn-8B-TIES/21947721-9f9a-4cc2-aa88-e1853f488167.json deleted file mode 100644 index a518a2260740a1cb7f7149ab7b9ef19da3cf9b9f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Winter_Dawn-8B-TIES/21947721-9f9a-4cc2-aa88-e1853f488167.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Winter_Dawn-8B-TIES/1762652579.586569", - "retrieved_timestamp": "1762652579.58657", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Winter_Dawn-8B-TIES", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Winter_Dawn-8B-TIES" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5496482665992899 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5309416142154736 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42785416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3910405585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Winter_Dusk-8B-TIES/cdc03c25-5bfb-4185-8e29-40e1af2ef253.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Winter_Dusk-8B-TIES/cdc03c25-5bfb-4185-8e29-40e1af2ef253.json deleted file mode 100644 index 8a68e76c3f205eea6a613bbee64f70d7952bbf02..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Winter_Dusk-8B-TIES/cdc03c25-5bfb-4185-8e29-40e1af2ef253.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Winter_Dusk-8B-TIES/1762652579.586781", - "retrieved_timestamp": "1762652579.586782", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Winter_Dusk-8B-TIES", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Winter_Dusk-8B-TIES" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7152610628687439 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4951882158967103 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3688229166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3478224734042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Winter_Night-8B-Model_Stock/49d98c73-75d8-4629-8cc2-a03592b0f551.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Winter_Night-8B-Model_Stock/49d98c73-75d8-4629-8cc2-a03592b0f551.json deleted file mode 100644 index 700ce6efc1870184a934029c494cad05353aa5c2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Winter_Night-8B-Model_Stock/49d98c73-75d8-4629-8cc2-a03592b0f551.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Winter_Night-8B-Model_Stock/1762652579.587023", - "retrieved_timestamp": "1762652579.587024", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Winter_Night-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Winter_Night-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7040452665593957 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5184968441488284 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14577039274924472 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3914270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3666057180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Yafune-8B-Model_Stock/edaf2deb-16a3-4109-84e0-e65498e09d1f.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Yafune-8B-Model_Stock/edaf2deb-16a3-4109-84e0-e65498e09d1f.json deleted file mode 100644 index 5d6cc5ca49019fb6236d4edb0d220c29d1181499..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Yafune-8B-Model_Stock/edaf2deb-16a3-4109-84e0-e65498e09d1f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Yafune-8B-Model_Stock/1762652579.587391", - "retrieved_timestamp": "1762652579.587392", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Yafune-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Yafune-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7533045652202822 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5466719512941253 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41728125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38505651595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Yearn_V3-8B-Model_Stock/763eec85-4395-43b6-aa79-9ecb024eb7af.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Yearn_V3-8B-Model_Stock/763eec85-4395-43b6-aa79-9ecb024eb7af.json deleted file mode 100644 index c6c7ea1264ee991401f988dfbb22af6c542c3c3e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Yearn_V3-8B-Model_Stock/763eec85-4395-43b6-aa79-9ecb024eb7af.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Yearn_V3-8B-Model_Stock/1762652579.587668", - "retrieved_timestamp": "1762652579.587669", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Yearn_V3-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Yearn_V3-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7289746760816855 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5322019394938072 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18957703927492447 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3908958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3801529255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_ZEUS-8B-V17-Abliterated_ALT/538f74e4-2587-43d7-a3fb-7826f3995ad9.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_ZEUS-8B-V17-Abliterated_ALT/538f74e4-2587-43d7-a3fb-7826f3995ad9.json deleted file mode 100644 index 685131ff18f7e344e6eada95fdbfeb4c67435f29..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_ZEUS-8B-V17-Abliterated_ALT/538f74e4-2587-43d7-a3fb-7826f3995ad9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_ZEUS-8B-V17-Abliterated_ALT/1762652579.587883", - "retrieved_timestamp": "1762652579.587884", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/ZEUS-8B-V17-Abliterated_ALT", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/ZEUS-8B-V17-Abliterated_ALT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5511221337163171 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5231075970343642 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1903323262839879 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41492708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3890458776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Zelus-8B-Model_Stock/2a1d9c9c-b3e4-49d8-96cb-720e53184db6.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Zelus-8B-Model_Stock/2a1d9c9c-b3e4-49d8-96cb-720e53184db6.json deleted file mode 100644 index 1f5bfd3df85ecaa541ec588f454440aa02375aa7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Zelus-8B-Model_Stock/2a1d9c9c-b3e4-49d8-96cb-720e53184db6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Zelus-8B-Model_Stock/1762652579.5881522", - "retrieved_timestamp": "1762652579.5881522", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Zelus-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Zelus-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.778833495126265 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5307011398651839 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42140625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38414228723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Zelus_V2-8B-Model_Stock/b385729e-27f8-4bf2-b2c6-674504fcd75b.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Zelus_V2-8B-Model_Stock/b385729e-27f8-4bf2-b2c6-674504fcd75b.json deleted file mode 100644 index 602bc81b0d87f267a6e64c256f37f98bd20520da..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Zelus_V2-8B-Model_Stock/b385729e-27f8-4bf2-b2c6-674504fcd75b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Zelus_V2-8B-Model_Stock/1762652579.588366", - "retrieved_timestamp": "1762652579.5883808", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Zelus_V2-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/Zelus_V2-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7898243327703826 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5344816839912676 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3960729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38331117021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_hakuchido-8B-MODEL_STOCK/a9d24835-302c-445b-b1fd-89d41e3e7878.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_hakuchido-8B-MODEL_STOCK/a9d24835-302c-445b-b1fd-89d41e3e7878.json deleted file mode 100644 index 32b08e124bf24d877cdecfc9bc0da137e8f6219a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_hakuchido-8B-MODEL_STOCK/a9d24835-302c-445b-b1fd-89d41e3e7878.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_hakuchido-8B-MODEL_STOCK/1762652579.589018", - "retrieved_timestamp": "1762652579.589018", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/hakuchido-8B-MODEL_STOCK", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/hakuchido-8B-MODEL_STOCK" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7375175645066203 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5398373390214104 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19486404833836857 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41746875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3781582446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_ichor-8B-Model_Stock/b1b0d419-e025-488a-a367-6769edfdf8ff.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_ichor-8B-Model_Stock/b1b0d419-e025-488a-a367-6769edfdf8ff.json deleted file mode 100644 index ccef33494daebcca247b040642b81bd34746bbb7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_ichor-8B-Model_Stock/b1b0d419-e025-488a-a367-6769edfdf8ff.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_ichor-8B-Model_Stock/1762652579.589237", - "retrieved_timestamp": "1762652579.589238", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/ichor-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/ichor-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5386319410275846 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5084222037759372 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10876132930513595 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42121875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31507646276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_ichor_1.1-8B-Model_Stock/64afccfe-af45-4c26-878a-eb01b56f3524.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_ichor_1.1-8B-Model_Stock/64afccfe-af45-4c26-878a-eb01b56f3524.json deleted file mode 100644 index 630eef705154af9433902582544b34053d6c1a75..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_ichor_1.1-8B-Model_Stock/64afccfe-af45-4c26-878a-eb01b56f3524.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_ichor_1.1-8B-Model_Stock/1762652579.589439", - "retrieved_timestamp": "1762652579.589439", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/ichor_1.1-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/ichor_1.1-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8096328851890761 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.528067770617839 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17749244712990936 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4067708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3855551861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_inexpertus-8B-Model_Stock/1f0112d0-46b4-4a2c-9ccc-4872ccbae7a5.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_inexpertus-8B-Model_Stock/1f0112d0-46b4-4a2c-9ccc-4872ccbae7a5.json deleted file mode 100644 index 54ea922efe4e56fc104fb672124f420ce97a360b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_inexpertus-8B-Model_Stock/1f0112d0-46b4-4a2c-9ccc-4872ccbae7a5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_inexpertus-8B-Model_Stock/1762652579.589726", - "retrieved_timestamp": "1762652579.589729", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/inexpertus-8B-Model_Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/inexpertus-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7795327508787795 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5280190470468065 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41182291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3790724734042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_inexpertus_1.1-8B-LINEAR/86f45b60-19d1-41fa-8538-3d22ea28a98f.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_inexpertus_1.1-8B-LINEAR/86f45b60-19d1-41fa-8538-3d22ea28a98f.json deleted file mode 100644 index e22bcbbe1f30c2cd213ba29152bd7d90d4a1e80a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_inexpertus_1.1-8B-LINEAR/86f45b60-19d1-41fa-8538-3d22ea28a98f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_inexpertus_1.1-8B-LINEAR/1762652579.59006", - "retrieved_timestamp": "1762652579.590061", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/inexpertus_1.1-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/inexpertus_1.1-8B-LINEAR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7527050448365891 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5524638802167572 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1729607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41734374999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38272938829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_inexpertus_1.2-8B-LINEAR/c2465654-27c4-4cad-94fa-3b0bff1fd242.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_inexpertus_1.2-8B-LINEAR/c2465654-27c4-4cad-94fa-3b0bff1fd242.json deleted file mode 100644 index 17134c2eb6fe45695d21091d8e3daeac68687964..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_inexpertus_1.2-8B-LINEAR/c2465654-27c4-4cad-94fa-3b0bff1fd242.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_inexpertus_1.2-8B-LINEAR/1762652579.590318", - "retrieved_timestamp": "1762652579.5903192", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/inexpertus_1.2-8B-LINEAR", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/inexpertus_1.2-8B-LINEAR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7347947889377962 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5523440600721518 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15861027190332325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41334374999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37882313829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_mergekit-nuslerp-nqzkedi/c1bff8a8-6159-4fe6-a9bd-846846d0e633.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_mergekit-nuslerp-nqzkedi/c1bff8a8-6159-4fe6-a9bd-846846d0e633.json deleted file mode 100644 index 23b82570ec356478a22bec0c6a03b323b9d5574e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_mergekit-nuslerp-nqzkedi/c1bff8a8-6159-4fe6-a9bd-846846d0e633.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_mergekit-nuslerp-nqzkedi/1762652579.590566", - "retrieved_timestamp": "1762652579.590566", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/mergekit-nuslerp-nqzkedi", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/mergekit-nuslerp-nqzkedi" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7764852812759035 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5361918366546249 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18806646525679757 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4224583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3918716755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_remember_to_breathe-8b-Model-Stock/76309e63-a135-45cf-9f06-b091215726d0.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_remember_to_breathe-8b-Model-Stock/76309e63-a135-45cf-9f06-b091215726d0.json deleted file mode 100644 index ad964573051367800aa1186536b16a32bfb673f5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_remember_to_breathe-8b-Model-Stock/76309e63-a135-45cf-9f06-b091215726d0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_remember_to_breathe-8b-Model-Stock/1762652579.5907981", - "retrieved_timestamp": "1762652579.590799", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/remember_to_breathe-8b-Model-Stock", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/remember_to_breathe-8b-Model-Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7104150321147887 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5411654435599922 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1487915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4144583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37608045212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_test/a4f14e1c-4c16-4fb8-9753-f05a6c5f2836.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_test/a4f14e1c-4c16-4fb8-9753-f05a6c5f2836.json deleted file mode 100644 index ae885b201954a3daf2c2c512a4f2c43320f34a86..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_test/a4f14e1c-4c16-4fb8-9753-f05a6c5f2836.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_test/1762652579.5910451", - "retrieved_timestamp": "1762652579.5910459", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/test", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49369450834895856 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5371873804638203 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4350833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3646941489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_test_ALT/1ca8f31a-4df9-4eb5-8ded-506d80246cdd.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_test_ALT/1ca8f31a-4df9-4eb5-8ded-506d80246cdd.json deleted file mode 100644 index d57995380072bdc488e6d2423bab0521e5b21c14..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_test_ALT/1ca8f31a-4df9-4eb5-8ded-506d80246cdd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_test_ALT/1762652579.591327", - "retrieved_timestamp": "1762652579.591328", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/test_ALT", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/test_ALT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.499689712185889 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5370433315307738 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4362916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3492353723404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_tests_pending-do_not_use_yet/de113d87-7875-4f5c-89eb-48a59797b19b.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_tests_pending-do_not_use_yet/de113d87-7875-4f5c-89eb-48a59797b19b.json deleted file mode 100644 index ca04950268808ba8f2cbb427f6c9a15b6f40593e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_tests_pending-do_not_use_yet/de113d87-7875-4f5c-89eb-48a59797b19b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_tests_pending-do_not_use_yet/1762652579.591608", - "retrieved_timestamp": "1762652579.591609", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/tests_pending-do_not_use_yet", - "developer": "DreadPoor", - "inference_platform": "unknown", - "id": "DreadPoor/tests_pending-do_not_use_yet" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7691414336183549 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5407897873885027 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19788519637462235 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40047916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38272938829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ECE-ILAB-PRYMMAL/ECE-ILAB-PRYMMAL_ILAB-Merging-3B-V2/cbdf2130-1b6a-43ae-a503-4fc7acf14a76.json b/leaderboard_data/HFOpenLLMv2/ECE-ILAB-PRYMMAL/ECE-ILAB-PRYMMAL_ILAB-Merging-3B-V2/cbdf2130-1b6a-43ae-a503-4fc7acf14a76.json deleted file mode 100644 index 356f857f52e678ca0e3de02c27a1a59709870d40..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ECE-ILAB-PRYMMAL/ECE-ILAB-PRYMMAL_ILAB-Merging-3B-V2/cbdf2130-1b6a-43ae-a503-4fc7acf14a76.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ECE-ILAB-PRYMMAL_ILAB-Merging-3B-V2/1762652579.5918348", - "retrieved_timestamp": "1762652579.591836", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ECE-ILAB-PRYMMAL/ILAB-Merging-3B-V2", - "developer": "ECE-ILAB-PRYMMAL", - "inference_platform": "unknown", - "id": "ECE-ILAB-PRYMMAL/ILAB-Merging-3B-V2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40289432040319684 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5401935891431586 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15181268882175228 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43321875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38605385638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Edgerunners/Edgerunners_meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16/1e2cd0e7-ce74-4eac-86fb-64412d1d2094.json b/leaderboard_data/HFOpenLLMv2/Edgerunners/Edgerunners_meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16/1e2cd0e7-ce74-4eac-86fb-64412d1d2094.json deleted file mode 100644 index 335566455cc3cbdf2826b811f17b60cd85f39060..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Edgerunners/Edgerunners_meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16/1e2cd0e7-ce74-4eac-86fb-64412d1d2094.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Edgerunners_meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16/1762652579.592541", - "retrieved_timestamp": "1762652579.592542", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16", - "developer": "Edgerunners", - "inference_platform": "unknown", - "id": "Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7147114101694614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4979908369885237 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33415625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36361369680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-1.4b/e268be37-589d-41f2-af98-a85bb412eb44.json b/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-1.4b/e268be37-589d-41f2-af98-a85bb412eb44.json deleted file mode 100644 index 88531211e10c4a7ee09b9562d8a95051489490af..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-1.4b/e268be37-589d-41f2-af98-a85bb412eb44.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-1.4b/1762652579.593903", - "retrieved_timestamp": "1762652579.593904", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EleutherAI/pythia-1.4b", - "developer": "EleutherAI", - "inference_platform": "unknown", - "id": "EleutherAI/pythia-1.4b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23708094522533543 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.315042649740714 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35378125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228390957446809 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 1.515 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-12b/4df16bb2-996f-473f-9096-a8a8e152ca9b.json b/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-12b/4df16bb2-996f-473f-9096-a8a8e152ca9b.json deleted file mode 100644 index 8f888f322b651bb19d749df08f4e09ca2f2153be..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-12b/4df16bb2-996f-473f-9096-a8a8e152ca9b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-12b/1762652579.5942001", - "retrieved_timestamp": "1762652579.594201", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EleutherAI/pythia-12b", - "developer": "EleutherAI", - "inference_platform": "unknown", - "id": "EleutherAI/pythia-12b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24714756845170813 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179653957935337 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24664429530201343 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3646979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11087101063829788 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 12.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-160m/d59ad4b0-e58e-48d6-90eb-93398c46251a.json b/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-160m/d59ad4b0-e58e-48d6-90eb-93398c46251a.json deleted file mode 100644 index 41f5ef5a30236e493c972e58c94722bcfa2bb4be..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-160m/d59ad4b0-e58e-48d6-90eb-93398c46251a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-160m/1762652579.5944068", - "retrieved_timestamp": "1762652579.594408", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EleutherAI/pythia-160m", - "developer": "EleutherAI", - "inference_platform": "unknown", - "id": "EleutherAI/pythia-160m" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18155161637787737 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2970437484241321 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4179375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11195146276595745 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 0.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-1b/a21cc55c-e9df-46ef-beed-b67a1750ddb7.json b/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-1b/a21cc55c-e9df-46ef-beed-b67a1750ddb7.json deleted file mode 100644 index be335ed366caa17a157428b38fd7c84a6e4b3d46..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-1b/a21cc55c-e9df-46ef-beed-b67a1750ddb7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-1b/1762652579.594618", - "retrieved_timestamp": "1762652579.594618", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EleutherAI/pythia-1b", - "developer": "EleutherAI", - "inference_platform": "unknown", - "id": "EleutherAI/pythia-1b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2207941594968018 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3004093017564394 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35520833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11361369680851063 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 1.079 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-2.8b/0afcbde6-b822-4264-8733-bc255ea73314.json b/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-2.8b/0afcbde6-b822-4264-8733-bc255ea73314.json deleted file mode 100644 index 4d8b20a0c24dc66a27ded7975ce0f8af020cc64f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-2.8b/0afcbde6-b822-4264-8733-bc255ea73314.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-2.8b/1762652579.594833", - "retrieved_timestamp": "1762652579.5948339", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EleutherAI/pythia-2.8b", - "developer": "EleutherAI", - "inference_platform": "unknown", - "id": "EleutherAI/pythia-2.8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21732226049105263 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3224085936276087 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3485729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11369680851063829 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 2.909 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-410m/c9db5f06-9aac-4678-bfe0-65773ece4558.json b/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-410m/c9db5f06-9aac-4678-bfe0-65773ece4558.json deleted file mode 100644 index 6699e12e9a4c9aaa6ad66bc04138b2cc6825aac6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-410m/c9db5f06-9aac-4678-bfe0-65773ece4558.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-410m/1762652579.5950441", - "retrieved_timestamp": "1762652579.595045", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EleutherAI/pythia-410m", - "developer": "EleutherAI", - "inference_platform": "unknown", - "id": "EleutherAI/pythia-410m" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21954525104500505 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.302813387064426 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35781250000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11278257978723404 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 0.506 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-6.9b/6ae207e3-2596-4b28-b058-d47d07465192.json b/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-6.9b/6ae207e3-2596-4b28-b058-d47d07465192.json deleted file mode 100644 index 8acb3bcdcf04483476cd4130dd17f4802a1e08f6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-6.9b/6ae207e3-2596-4b28-b058-d47d07465192.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-6.9b/1762652579.595358", - "retrieved_timestamp": "1762652579.595359", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EleutherAI/pythia-6.9b", - "developer": "EleutherAI", - "inference_platform": "unknown", - "id": "EleutherAI/pythia-6.9b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22811362739752744 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3232287869322383 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3590520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1146941489361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 6.9 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EnnoAi/EnnoAi_EnnoAi-7B-French-Instruct-202502/75939d35-c0ca-4256-b667-fe6042ca5979.json b/leaderboard_data/HFOpenLLMv2/EnnoAi/EnnoAi_EnnoAi-7B-French-Instruct-202502/75939d35-c0ca-4256-b667-fe6042ca5979.json deleted file mode 100644 index fedade8dc97a08f961367e05d5e51140b44a40e7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EnnoAi/EnnoAi_EnnoAi-7B-French-Instruct-202502/75939d35-c0ca-4256-b667-fe6042ca5979.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EnnoAi_EnnoAi-7B-French-Instruct-202502/1762652579.596549", - "retrieved_timestamp": "1762652579.59655", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EnnoAi/EnnoAi-7B-French-Instruct-202502", - "developer": "EnnoAi", - "inference_platform": "unknown", - "id": "EnnoAi/EnnoAi-7B-French-Instruct-202502" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5564424615575562 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5574545199388612 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3723564954682779 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45997916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4013464095744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Epiculous/Epiculous_Azure_Dusk-v0.2/79790560-846a-48fb-b37a-462162eb0e97.json b/leaderboard_data/HFOpenLLMv2/Epiculous/Epiculous_Azure_Dusk-v0.2/79790560-846a-48fb-b37a-462162eb0e97.json deleted file mode 100644 index bc455d7884f2115848fa87a1405d67a24d48341a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Epiculous/Epiculous_Azure_Dusk-v0.2/79790560-846a-48fb-b37a-462162eb0e97.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Epiculous_Azure_Dusk-v0.2/1762652579.5970619", - "retrieved_timestamp": "1762652579.5970628", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Epiculous/Azure_Dusk-v0.2", - "developer": "Epiculous", - "inference_platform": "unknown", - "id": "Epiculous/Azure_Dusk-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.346715603487635 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4119721873553597 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3834583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3034408244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Epiculous/Epiculous_Crimson_Dawn-v0.2/91b7917e-a908-4281-9a4d-a2c1e7558105.json b/leaderboard_data/HFOpenLLMv2/Epiculous/Epiculous_Crimson_Dawn-v0.2/91b7917e-a908-4281-9a4d-a2c1e7558105.json deleted file mode 100644 index b65e8093479c366f6c448e111e2a6446c2523a47..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Epiculous/Epiculous_Crimson_Dawn-v0.2/91b7917e-a908-4281-9a4d-a2c1e7558105.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Epiculous_Crimson_Dawn-v0.2/1762652579.5973198", - "retrieved_timestamp": "1762652579.5973198", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Epiculous/Crimson_Dawn-v0.2", - "developer": "Epiculous", - "inference_platform": "unknown", - "id": "Epiculous/Crimson_Dawn-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3103454389907667 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44823796489645434 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4151770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27210771276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Epiculous/Epiculous_NovaSpark/9270e697-84b1-46c5-afcc-481065f2be8f.json b/leaderboard_data/HFOpenLLMv2/Epiculous/Epiculous_NovaSpark/9270e697-84b1-46c5-afcc-481065f2be8f.json deleted file mode 100644 index d68250ff8d0f40e8e7073b67ea11b0d6f53b1467..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Epiculous/Epiculous_NovaSpark/9270e697-84b1-46c5-afcc-481065f2be8f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Epiculous_NovaSpark/1762652579.597535", - "retrieved_timestamp": "1762652579.597536", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Epiculous/NovaSpark", - "developer": "Epiculous", - "inference_platform": "unknown", - "id": "Epiculous/NovaSpark" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6408473960203371 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5063958663768304 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15181268882175228 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3881979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3648603723404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Epiculous/Epiculous_Violet_Twilight-v0.2/83990950-a34c-463f-9a1a-d9371910da6f.json b/leaderboard_data/HFOpenLLMv2/Epiculous/Epiculous_Violet_Twilight-v0.2/83990950-a34c-463f-9a1a-d9371910da6f.json deleted file mode 100644 index f9ef64fdb02367ae3369b0148a88098d5c94b3ee..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Epiculous/Epiculous_Violet_Twilight-v0.2/83990950-a34c-463f-9a1a-d9371910da6f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Epiculous_Violet_Twilight-v0.2/1762652579.597749", - "retrieved_timestamp": "1762652579.59775", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Epiculous/Violet_Twilight-v0.2", - "developer": "Epiculous", - "inference_platform": "unknown", - "id": "Epiculous/Violet_Twilight-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45317756885064964 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4614552476845888 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02870090634441088 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42993750000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3110871010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_DeepPhi-3.5-mini-instruct/b367fb18-f302-41ec-a5f9-7d47766ca6f3.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_DeepPhi-3.5-mini-instruct/b367fb18-f302-41ec-a5f9-7d47766ca6f3.json deleted file mode 100644 index d54a01d07e9f42146b5c0e5fa37ee8aa06dfb1f4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_DeepPhi-3.5-mini-instruct/b367fb18-f302-41ec-a5f9-7d47766ca6f3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_DeepPhi-3.5-mini-instruct/1762652579.5991712", - "retrieved_timestamp": "1762652579.599172", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/DeepPhi-3.5-mini-instruct", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/DeepPhi-3.5-mini-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1325915238234551 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28822860667627487 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2332214765100671 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36562500000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11028922872340426 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_FineLlama3.1-8B-Instruct/a99828d9-a521-4b46-bd81-e791fae7bcf8.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_FineLlama3.1-8B-Instruct/a99828d9-a521-4b46-bd81-e791fae7bcf8.json deleted file mode 100644 index feac98594908fe1a50c8835a2e890f86358c8765..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_FineLlama3.1-8B-Instruct/a99828d9-a521-4b46-bd81-e791fae7bcf8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_FineLlama3.1-8B-Instruct/1762652579.5997", - "retrieved_timestamp": "1762652579.599701", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/FineLlama3.1-8B-Instruct", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/FineLlama3.1-8B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08000992921005155 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45573635384163325 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3481666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3112533244680851 - } - } - ], - "additional_details": { - "precision": "4bit", - "architecture": "?", - "params_billions": 14.483 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-12B/bdb69cfa-cce7-4813-babb-b6f987be90de.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-12B/bdb69cfa-cce7-4813-babb-b6f987be90de.json deleted file mode 100644 index a3bec4c83b875225f98b048992df67f4f5bbc606..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-12B/bdb69cfa-cce7-4813-babb-b6f987be90de.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-12B/1762652579.59992", - "retrieved_timestamp": "1762652579.59992", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Fireball-12B", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1833501775289565 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5110893652548262 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42363541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3343583776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200/627a984d-8a4b-4a10-ac9e-05ccdbcc1835.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200/627a984d-8a4b-4a10-ac9e-05ccdbcc1835.json deleted file mode 100644 index d7c6ece5eb912c1bff93e7a8a68554669a9f13cd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200/627a984d-8a4b-4a10-ac9e-05ccdbcc1835.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200/1762652579.600397", - "retrieved_timestamp": "1762652579.600397", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4577243934981405 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4838398624677178 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39445833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35829454787234044 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto/b8b22223-7ef6-4fec-9928-68de2ce516e6.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto/b8b22223-7ef6-4fec-9928-68de2ce516e6.json deleted file mode 100644 index d680108c8db153aec975068fb3f27918b6f7d2e5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto/b8b22223-7ef6-4fec-9928-68de2ce516e6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto/1762652579.601048", - "retrieved_timestamp": "1762652579.6010492", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44318630123627534 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4823644760491404 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4066458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3515625 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/7268e623-7dc3-4a79-b410-3f2efdbb6b1b.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/7268e623-7dc3-4a79-b410-3f2efdbb6b1b.json deleted file mode 100644 index 35487c5e8d6aabc2a9b94e3c6154649bdfe3e14c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/7268e623-7dc3-4a79-b410-3f2efdbb6b1b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/1762652579.6022642", - "retrieved_timestamp": "1762652579.6022651", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7207066140063919 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4610092915501656 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3432395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3353557180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/ba8d6727-fe89-4bab-95a2-5f70d77034dc.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/ba8d6727-fe89-4bab-95a2-5f70d77034dc.json deleted file mode 100644 index efd0ead2d0a647115c40e7d1c891e7f94f156b61..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/ba8d6727-fe89-4bab-95a2-5f70d77034dc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/1762652579.601946", - "retrieved_timestamp": "1762652579.6019468", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7304984108831234 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46492466713692354 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13972809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32088541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34798869680851063 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds/1ad587be-8544-4c37-bb8c-e21ad685039c.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds/1ad587be-8544-4c37-bb8c-e21ad685039c.json deleted file mode 100644 index 89ea0a504ecd8e38f0539c834b2eb6a90da79701..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds/1ad587be-8544-4c37-bb8c-e21ad685039c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds/1762652579.60172", - "retrieved_timestamp": "1762652579.601721", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.669099101495144 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4668070143164938 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1336858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34178125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33892952127659576 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code/5f40e687-560e-4846-bbc1-4c2300680d4b.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code/5f40e687-560e-4846-bbc1-4c2300680d4b.json deleted file mode 100644 index 9f28c1467593e4c17039d298b45c63823996b629..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code/5f40e687-560e-4846-bbc1-4c2300680d4b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code/1762652579.601493", - "retrieved_timestamp": "1762652579.601493", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5975334335119704 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4904191122627008 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1336858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40103125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34225398936170215 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K/839b6ee8-2f25-4b53-abec-a0a9dd198f04.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K/839b6ee8-2f25-4b53-abec-a0a9dd198f04.json deleted file mode 100644 index 6d128b3261b15738032149a5b2107814e2823151..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K/839b6ee8-2f25-4b53-abec-a0a9dd198f04.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K/1762652579.6012669", - "retrieved_timestamp": "1762652579.601268", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4457339858242796 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48973199216860547 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37622916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3543051861702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT/6f29d957-8b65-4ee7-96dd-da2477023403.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT/6f29d957-8b65-4ee7-96dd-da2477023403.json deleted file mode 100644 index 5213420b82165d0ed9a8f8297e7c9744a06f62e5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT/6f29d957-8b65-4ee7-96dd-da2477023403.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT/1762652579.6025012", - "retrieved_timestamp": "1762652579.6025019", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4578241288669619 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4760520079608936 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13821752265861026 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3881354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3470744680851064 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto/c39007d8-b4b8-485a-88af-39d18a6007c3.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto/c39007d8-b4b8-485a-88af-39d18a6007c3.json deleted file mode 100644 index a9a5f3042e39bde250f5cb243b7ee99fbd1544a0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto/c39007d8-b4b8-485a-88af-39d18a6007c3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto/1762652579.602742", - "retrieved_timestamp": "1762652579.6027431", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7204816553411615 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4817795525811035 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35480385638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Math/506bb9ca-e322-4ee3-b2d6-96e334a99473.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Math/506bb9ca-e322-4ee3-b2d6-96e334a99473.json deleted file mode 100644 index bc1dc68fea01f82a9cc7c086ba5db9d0e474881f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Math/506bb9ca-e322-4ee3-b2d6-96e334a99473.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Math/1762652579.602981", - "retrieved_timestamp": "1762652579.6029818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Math", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Math" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46229559790245434 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49829504320793055 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33311170212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO/e351aba3-7a05-400b-abbf-d09c1fe333e3.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO/e351aba3-7a05-400b-abbf-d09c1fe333e3.json deleted file mode 100644 index 67c67047e4659a09c8a09038753f2ef8e32ecfb6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO/e351aba3-7a05-400b-abbf-d09c1fe333e3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO/1762652579.60321", - "retrieved_timestamp": "1762652579.603211", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46109655713506825 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48010141537970213 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12537764350453173 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3998229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35206117021276595 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Mistral-Nemo-Base-2407-v1-DPO2/6a0cc28d-d7bc-454d-ab7c-93c823256f30.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Mistral-Nemo-Base-2407-v1-DPO2/6a0cc28d-d7bc-454d-ab7c-93c823256f30.json deleted file mode 100644 index d26513a5e5aafecf835774796a0a262a52978150..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Mistral-Nemo-Base-2407-v1-DPO2/6a0cc28d-d7bc-454d-ab7c-93c823256f30.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Mistral-Nemo-Base-2407-v1-DPO2/1762652579.603439", - "retrieved_timestamp": "1762652579.60344", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Mistral-Nemo-Base-2407-v1-DPO2", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Mistral-Nemo-Base-2407-v1-DPO2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18607295309778055 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49677687590350894 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4040104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33527260638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Mistral-Nemo-Instruct-12B-Philosophy-Math/ee2ab45a-4a93-4942-8510-aef93b39b7e3.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Mistral-Nemo-Instruct-12B-Philosophy-Math/ee2ab45a-4a93-4942-8510-aef93b39b7e3.json deleted file mode 100644 index 4e763c824945e708f9b80d7d412a264c55b547bc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Mistral-Nemo-Instruct-12B-Philosophy-Math/ee2ab45a-4a93-4942-8510-aef93b39b7e3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Mistral-Nemo-Instruct-12B-Philosophy-Math/1762652579.6045282", - "retrieved_timestamp": "1762652579.604529", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Mistral-Nemo-Instruct-12B-Philosophy-Math", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Mistral-Nemo-Instruct-12B-Philosophy-Math" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06946790072563022 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5364928342081372 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09592145015105741 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42921875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32962101063829785 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy/644cdea0-49f2-43b9-b94d-55d31c0e0d54.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy/644cdea0-49f2-43b9-b94d-55d31c0e0d54.json deleted file mode 100644 index 00987b14c3a0a9db319e5d86f319d6d7eaba78b5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy/644cdea0-49f2-43b9-b94d-55d31c0e0d54.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy/1762652579.6049678", - "retrieved_timestamp": "1762652579.6049678", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7100903380807368 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46279874531423665 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13972809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3194895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33111702127659576 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic/e2422bfe-8569-4181-8ec1-955086bbb8bb.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic/e2422bfe-8569-4181-8ec1-955086bbb8bb.json deleted file mode 100644 index 018d579060839a24c9ec6f95bb00f80cdd62cb21..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic/e2422bfe-8569-4181-8ec1-955086bbb8bb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic/1762652579.605414", - "retrieved_timestamp": "1762652579.6054149", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.712213593265868 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45659361690861294 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32348958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33502327127659576 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent/98c2fc89-acc4-4740-9d24-c9e9c2cd9ad7.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent/98c2fc89-acc4-4740-9d24-c9e9c2cd9ad7.json deleted file mode 100644 index 9e0d4955a4a6e46d18bc96475e79cc8ab0c15f32..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent/98c2fc89-acc4-4740-9d24-c9e9c2cd9ad7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent/1762652579.605665", - "retrieved_timestamp": "1762652579.6056662", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6915306941138402 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4524732961901791 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35775 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32903922872340424 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Reasoning-Llama-3.2-1B-Instruct-v1.2/3e1fd9a0-a037-4278-baaa-b444d3723557.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Reasoning-Llama-3.2-1B-Instruct-v1.2/3e1fd9a0-a037-4278-baaa-b444d3723557.json deleted file mode 100644 index ed8de8fc9f322f9b6ffa1d7d78cbc917b9b0ff47..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Reasoning-Llama-3.2-1B-Instruct-v1.2/3e1fd9a0-a037-4278-baaa-b444d3723557.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.2-1B-Instruct-v1.2/1762652579.606377", - "retrieved_timestamp": "1762652579.606377", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.2", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40871443325930756 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3324495305251265 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11785239361702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Reasoning-Llama-3.2-1B-Instruct-v1.3/9c141030-9c3f-4e80-8b97-9297f3d81df6.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Reasoning-Llama-3.2-1B-Instruct-v1.3/9c141030-9c3f-4e80-8b97-9297f3d81df6.json deleted file mode 100644 index 5b768a6f9228f04ba8b86cb108496fef890795b8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Reasoning-Llama-3.2-1B-Instruct-v1.3/9c141030-9c3f-4e80-8b97-9297f3d81df6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.2-1B-Instruct-v1.3/1762652579.606596", - "retrieved_timestamp": "1762652579.6065972", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.3", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3272816127874041 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3262818751942827 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.326 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11727061170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO/d09af70f-bb55-40e8-88f2-a78f20c90b8e.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO/d09af70f-bb55-40e8-88f2-a78f20c90b8e.json deleted file mode 100644 index a22033ab2aaa066273f47ac589c1e6efddaf82fd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO/d09af70f-bb55-40e8-88f2-a78f20c90b8e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO/1762652579.6070201", - "retrieved_timestamp": "1762652579.607021", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7289746760816855 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45181862491313 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15332326283987915 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3486666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3100066489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Reasoning-Llama-3.2-3B-Math-Instruct-RE1/099d3be6-bd40-416f-90a1-582f66049c54.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Reasoning-Llama-3.2-3B-Math-Instruct-RE1/099d3be6-bd40-416f-90a1-582f66049c54.json deleted file mode 100644 index 19c3ea30d9fad81d1644f53df7496667a37a08de..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Reasoning-Llama-3.2-3B-Math-Instruct-RE1/099d3be6-bd40-416f-90a1-582f66049c54.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.2-3B-Math-Instruct-RE1/1762652579.606812", - "retrieved_timestamp": "1762652579.606813", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5119538380386264 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43810846923178864 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34352083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2789228723404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math/03d616a2-9a52-4014-8ecf-94dc93a5b4d2.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math/03d616a2-9a52-4014-8ecf-94dc93a5b4d2.json deleted file mode 100644 index 088a1f9c883cda82eebc803fb119067e5e0b1d08..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math/03d616a2-9a52-4014-8ecf-94dc93a5b4d2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math/1762652579.60724", - "retrieved_timestamp": "1762652579.607241", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5902893212232432 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.436379591348482 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14803625377643503 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3314270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28233045212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-0/9835468b-c049-4562-8633-864d29c7bb75.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-0/9835468b-c049-4562-8633-864d29c7bb75.json deleted file mode 100644 index 7647636ddc98d1ff8da7159306da626dee974b3b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-0/9835468b-c049-4562-8633-864d29c7bb75.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-0/1762652579.60745", - "retrieved_timestamp": "1762652579.60745", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/ReasoningCore-3B-0", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/ReasoningCore-3B-0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7341454008696924 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44460707451155984 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15861027190332325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35539583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3172373670212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-Instruct-r01-Reflect/b3efb02e-5312-48cf-b9e9-e90d3d5d9a7d.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-Instruct-r01-Reflect/b3efb02e-5312-48cf-b9e9-e90d3d5d9a7d.json deleted file mode 100644 index cf9aa82c71bfa3658f9500ed70b3cb21b2df95e0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-Instruct-r01-Reflect/b3efb02e-5312-48cf-b9e9-e90d3d5d9a7d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-Instruct-r01-Reflect/1762652579.607657", - "retrieved_timestamp": "1762652579.607658", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/ReasoningCore-3B-Instruct-r01-Reflect", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/ReasoningCore-3B-Instruct-r01-Reflect" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7334960128015887 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44496323889512146 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1540785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3527291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31441156914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-R01/5b06f64a-5c31-457e-a414-00e35888a6b2.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-R01/5b06f64a-5c31-457e-a414-00e35888a6b2.json deleted file mode 100644 index 7831b3f28c12f8fa7a7a85e8c64af9abbd23218e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-R01/5b06f64a-5c31-457e-a414-00e35888a6b2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-R01/1762652579.607871", - "retrieved_timestamp": "1762652579.607872", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/ReasoningCore-3B-R01", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/ReasoningCore-3B-R01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29760590787998065 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43725189001258497 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31945833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25914228723404253 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-RE1-V2/83b3c488-c210-4ce7-8f7f-75d0d04d5b02.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-RE1-V2/83b3c488-c210-4ce7-8f7f-75d0d04d5b02.json deleted file mode 100644 index 7c98fbfb0a2ecb9d9754d4ecb2357e228ccec496..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-RE1-V2/83b3c488-c210-4ce7-8f7f-75d0d04d5b02.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-RE1-V2/1762652579.6080902", - "retrieved_timestamp": "1762652579.6080909", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/ReasoningCore-3B-RE1-V2", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/ReasoningCore-3B-RE1-V2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7393161256576994 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44623884450165807 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31806848404255317 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-RE1-V2A/512a09c1-6c1c-4120-a659-91809607393a.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-RE1-V2A/512a09c1-6c1c-4120-a659-91809607393a.json deleted file mode 100644 index 2345c24c0a154a6465f538a0efbefc911c14687b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-RE1-V2A/512a09c1-6c1c-4120-a659-91809607393a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-RE1-V2A/1762652579.608308", - "retrieved_timestamp": "1762652579.608309", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/ReasoningCore-3B-RE1-V2A", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/ReasoningCore-3B-RE1-V2A" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5732534120577845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4189899823502799 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09290030211480363 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33520833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2736037234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-RE1-V2B/f92ef151-aa21-4240-8de6-1ff04bec55d9.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-RE1-V2B/f92ef151-aa21-4240-8de6-1ff04bec55d9.json deleted file mode 100644 index 2996bac5b1ab19e0b2d7434683bb0e5d2ba1142d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-RE1-V2B/f92ef151-aa21-4240-8de6-1ff04bec55d9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-RE1-V2B/1762652579.60862", - "retrieved_timestamp": "1762652579.6086211", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/ReasoningCore-3B-RE1-V2B", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/ReasoningCore-3B-RE1-V2B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5051097753959495 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41678877951897175 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10725075528700906 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3448229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26728723404255317 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-RE1-V2C/88cb3df4-7cbb-440a-87d4-9b2a89f3572c.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-RE1-V2C/88cb3df4-7cbb-440a-87d4-9b2a89f3572c.json deleted file mode 100644 index 7ed9cce0dae8a29366420e1714000cd52ef66d9c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-RE1-V2C/88cb3df4-7cbb-440a-87d4-9b2a89f3572c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-RE1-V2C/1762652579.608856", - "retrieved_timestamp": "1762652579.6088572", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/ReasoningCore-3B-RE1-V2C", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/ReasoningCore-3B-RE1-V2C" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5057092957796425 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41774567831526244 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34215625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2691156914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-T1-V1/ec3846e6-d111-4c77-93fb-8d1d8106271a.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-T1-V1/ec3846e6-d111-4c77-93fb-8d1d8106271a.json deleted file mode 100644 index 9bdff3b2250e470fa76e0bd3cfe77b6bf8ba906d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-T1-V1/ec3846e6-d111-4c77-93fb-8d1d8106271a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-T1-V1/1762652579.609117", - "retrieved_timestamp": "1762652579.609117", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/ReasoningCore-3B-T1-V1", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/ReasoningCore-3B-T1-V1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7207564816908026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4516908992961786 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14577039274924472 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35403125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31200132978723405 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-T1_1/ce5a0509-e68c-40f4-8b7b-c56ba90c0e10.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-T1_1/ce5a0509-e68c-40f4-8b7b-c56ba90c0e10.json deleted file mode 100644 index c302de06d7bbf27fc4cd3c6e5098d8fef4c1012e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-T1_1/ce5a0509-e68c-40f4-8b7b-c56ba90c0e10.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-T1_1/1762652579.609335", - "retrieved_timestamp": "1762652579.6093361", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/ReasoningCore-3B-T1_1", - "developer": "EpistemeAI", - "inference_platform": "unknown", - "id": "EpistemeAI/ReasoningCore-3B-T1_1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7274509412802475 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45239424517060806 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1540785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3553645833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3116688829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI2/EpistemeAI2_Fireball-12B-v1.2/de86ca37-ffcb-41df-a0d1-68cb545ec1de.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI2/EpistemeAI2_Fireball-12B-v1.2/de86ca37-ffcb-41df-a0d1-68cb545ec1de.json deleted file mode 100644 index ea71ead47e8751ffc408b696958fc775b743ad22..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI2/EpistemeAI2_Fireball-12B-v1.2/de86ca37-ffcb-41df-a0d1-68cb545ec1de.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-12B-v1.2/1762652579.609813", - "retrieved_timestamp": "1762652579.609814", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-12B-v1.2", - "developer": "EpistemeAI2", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-12B-v1.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13553925805750963 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5018583230653281 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4173125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33369348404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI2/EpistemeAI2_Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo/7e03e547-5324-4c5d-b364-413014fad7eb.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI2/EpistemeAI2_Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo/7e03e547-5324-4c5d-b364-413014fad7eb.json deleted file mode 100644 index b4930a65b734bfe9e8a1f075720f1cd9f0f0bd75..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI2/EpistemeAI2_Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo/7e03e547-5324-4c5d-b364-413014fad7eb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo/1762652579.610973", - "retrieved_timestamp": "1762652579.6109738", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo", - "developer": "EpistemeAI2", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4865756193566404 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48807730539009225 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13066465256797583 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3931875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3614527925531915 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI2/EpistemeAI2_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math/0115907a-a473-4f12-8f0b-5dafd729fc44.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI2/EpistemeAI2_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math/0115907a-a473-4f12-8f0b-5dafd729fc44.json deleted file mode 100644 index c149180bcb9e8e789a16738b82e612fea5612f47..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI2/EpistemeAI2_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math/0115907a-a473-4f12-8f0b-5dafd729fc44.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math/1762652579.61236", - "retrieved_timestamp": "1762652579.612361", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math", - "developer": "EpistemeAI2", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5515465631191904 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48075580310342053 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1351963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36925 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3420046542553192 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI2/EpistemeAI2_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT/63b6d34d-1a59-40b6-b663-1d81544867f2.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI2/EpistemeAI2_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT/63b6d34d-1a59-40b6-b663-1d81544867f2.json deleted file mode 100644 index 0cc466f16b93e83964519f8d42006821f557ba33..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/EpistemeAI2/EpistemeAI2_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT/63b6d34d-1a59-40b6-b663-1d81544867f2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT/1762652579.6125782", - "retrieved_timestamp": "1762652579.612579", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT", - "developer": "EpistemeAI2", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4633195476890207 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4790834283312441 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37743750000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3564660904255319 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Eric111/Eric111_CatunaMayo-DPO/ef63850d-6acf-4d04-ac01-7ac407bf3b89.json b/leaderboard_data/HFOpenLLMv2/Eric111/Eric111_CatunaMayo-DPO/ef63850d-6acf-4d04-ac01-7ac407bf3b89.json deleted file mode 100644 index 1d0067649dbeb839c7fa1d94b094a4765118035b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Eric111/Eric111_CatunaMayo-DPO/ef63850d-6acf-4d04-ac01-7ac407bf3b89.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Eric111_CatunaMayo-DPO/1762652579.613287", - "retrieved_timestamp": "1762652579.613288", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Eric111/CatunaMayo-DPO", - "developer": "Eric111", - "inference_platform": "unknown", - "id": "Eric111/CatunaMayo-DPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4214539643700936 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5223991323844243 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44503125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3169880319148936 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Eric111/Eric111_CatunaMayo/9c2ab331-44f5-4306-a57c-5ddb0154ba63.json b/leaderboard_data/HFOpenLLMv2/Eric111/Eric111_CatunaMayo/9c2ab331-44f5-4306-a57c-5ddb0154ba63.json deleted file mode 100644 index bc34dab4f5dc7962beff43d269051756f5f83f95..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Eric111/Eric111_CatunaMayo/9c2ab331-44f5-4306-a57c-5ddb0154ba63.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Eric111_CatunaMayo/1762652579.613048", - "retrieved_timestamp": "1762652579.613049", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Eric111/CatunaMayo", - "developer": "Eric111", - "inference_platform": "unknown", - "id": "Eric111/CatunaMayo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4074156571231 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5243635518600797 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45398958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3178191489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Etherll/Etherll_Chocolatine-3B-Instruct-DPO-Revised-Ties-v2/80ff60c0-820c-425d-8b32-44fc61128c9f.json b/leaderboard_data/HFOpenLLMv2/Etherll/Etherll_Chocolatine-3B-Instruct-DPO-Revised-Ties-v2/80ff60c0-820c-425d-8b32-44fc61128c9f.json deleted file mode 100644 index 472ac84596958487ed417fff6206e46b207b2fe1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Etherll/Etherll_Chocolatine-3B-Instruct-DPO-Revised-Ties-v2/80ff60c0-820c-425d-8b32-44fc61128c9f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Etherll_Chocolatine-3B-Instruct-DPO-Revised-Ties-v2/1762652579.613742", - "retrieved_timestamp": "1762652579.613743", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties-v2", - "developer": "Etherll", - "inference_platform": "unknown", - "id": "Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37399322686028624 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5410649663618229 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16314199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4649375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39777260638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Etherll/Etherll_Chocolatine-3B-Instruct-DPO-Revised-Ties/d3b94b8e-8612-4928-bdba-81226af143b2.json b/leaderboard_data/HFOpenLLMv2/Etherll/Etherll_Chocolatine-3B-Instruct-DPO-Revised-Ties/d3b94b8e-8612-4928-bdba-81226af143b2.json deleted file mode 100644 index 8064de8afb39da127dd2f2564c284b86180548e5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Etherll/Etherll_Chocolatine-3B-Instruct-DPO-Revised-Ties/d3b94b8e-8612-4928-bdba-81226af143b2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Etherll_Chocolatine-3B-Instruct-DPO-Revised-Ties/1762652579.613493", - "retrieved_timestamp": "1762652579.613494", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties", - "developer": "Etherll", - "inference_platform": "unknown", - "id": "Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3724694920588483 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5410649663618229 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16314199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4649375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39777260638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Etherll/Etherll_Qwen2.5-Coder-7B-Instruct-Ties/ea9f32e5-431d-4573-9ac9-25ebfa9c2c9e.json b/leaderboard_data/HFOpenLLMv2/Etherll/Etherll_Qwen2.5-Coder-7B-Instruct-Ties/ea9f32e5-431d-4573-9ac9-25ebfa9c2c9e.json deleted file mode 100644 index a87095004387696d8a3908b63234defae811676e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Etherll/Etherll_Qwen2.5-Coder-7B-Instruct-Ties/ea9f32e5-431d-4573-9ac9-25ebfa9c2c9e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Etherll_Qwen2.5-Coder-7B-Instruct-Ties/1762652579.61485", - "retrieved_timestamp": "1762652579.614851", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Etherll/Qwen2.5-Coder-7B-Instruct-Ties", - "developer": "Etherll", - "inference_platform": "unknown", - "id": "Etherll/Qwen2.5-Coder-7B-Instruct-Ties" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5005385709916355 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4895144464043051 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29154078549848944 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3296979865771812 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43728125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3503158244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Etherll/Etherll_SuperHermes/a641d61c-aa42-4bce-afc0-ba7639f0a24e.json b/leaderboard_data/HFOpenLLMv2/Etherll/Etherll_SuperHermes/a641d61c-aa42-4bce-afc0-ba7639f0a24e.json deleted file mode 100644 index 7e1dc17091a6bd7833702ca60855425549f9aa4d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Etherll/Etherll_SuperHermes/a641d61c-aa42-4bce-afc0-ba7639f0a24e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Etherll_SuperHermes/1762652579.615286", - "retrieved_timestamp": "1762652579.615287", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Etherll/SuperHermes", - "developer": "Etherll", - "inference_platform": "unknown", - "id": "Etherll/SuperHermes" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5459015412438996 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5289531792679852 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16540785498489427 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44004166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39486369680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_Chocolatine-Fusion-14B/5d5a7561-8a41-48ea-ae1c-e986ac666f19.json b/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_Chocolatine-Fusion-14B/5d5a7561-8a41-48ea-ae1c-e986ac666f19.json deleted file mode 100644 index 439d59adc85a764f01db3e474a4cbdcc88b6ee74..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_Chocolatine-Fusion-14B/5d5a7561-8a41-48ea-ae1c-e986ac666f19.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FINGU-AI_Chocolatine-Fusion-14B/1762652579.615752", - "retrieved_timestamp": "1762652579.615752", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FINGU-AI/Chocolatine-Fusion-14B", - "developer": "FINGU-AI", - "inference_platform": "unknown", - "id": "FINGU-AI/Chocolatine-Fusion-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6949028577507679 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.64132285324613 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716442953020134 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49402083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5261801861702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 8.367 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_L3-8B/f2a0c2ff-40a4-4a75-93ca-b611c4314dd5.json b/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_L3-8B/f2a0c2ff-40a4-4a75-93ca-b611c4314dd5.json deleted file mode 100644 index 8f2b82f315fd5386fec635448b7184a4fd6529d2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_L3-8B/f2a0c2ff-40a4-4a75-93ca-b611c4314dd5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FINGU-AI_L3-8B/1762652579.615993", - "retrieved_timestamp": "1762652579.615993", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FINGU-AI/L3-8B", - "developer": "FINGU-AI", - "inference_platform": "unknown", - "id": "FINGU-AI/L3-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7517309627344335 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4985585187130108 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2545317220543807 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38283333333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36394614361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_Q-Small-3B/11d9d5ea-29f2-412e-af48-858626ebeec5.json b/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_Q-Small-3B/11d9d5ea-29f2-412e-af48-858626ebeec5.json deleted file mode 100644 index a78f891cc77e015dae1e004b251177f3d98169be..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_Q-Small-3B/11d9d5ea-29f2-412e-af48-858626ebeec5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FINGU-AI_Q-Small-3B/1762652579.616768", - "retrieved_timestamp": "1762652579.61677", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FINGU-AI/Q-Small-3B", - "developer": "FINGU-AI", - "inference_platform": "unknown", - "id": "FINGU-AI/Q-Small-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4145345461154182 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43185314557630744 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40054166666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27900598404255317 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_QwQ-Buddy-32B-Alpha/32836e5d-d413-4e40-8c9c-4cb8c3daa23a.json b/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_QwQ-Buddy-32B-Alpha/32836e5d-d413-4e40-8c9c-4cb8c3daa23a.json deleted file mode 100644 index fe73df295f414c25f056ec7d67e7bd08e6422166..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_QwQ-Buddy-32B-Alpha/32836e5d-d413-4e40-8c9c-4cb8c3daa23a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FINGU-AI_QwQ-Buddy-32B-Alpha/1762652579.617035", - "retrieved_timestamp": "1762652579.617036", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FINGU-AI/QwQ-Buddy-32B-Alpha", - "developer": "FINGU-AI", - "inference_platform": "unknown", - "id": "FINGU-AI/QwQ-Buddy-32B-Alpha" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34464221598691475 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.642442234274039 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5059895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5294215425531915 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 19.662 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_RomboUltima-32B/65c5a05d-0b24-4767-88ff-24984fa0f988.json b/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_RomboUltima-32B/65c5a05d-0b24-4767-88ff-24984fa0f988.json deleted file mode 100644 index 04017fd31edcd4baa875d28ac25180305ab169bc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_RomboUltima-32B/65c5a05d-0b24-4767-88ff-24984fa0f988.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FINGU-AI_RomboUltima-32B/1762652579.6173398", - "retrieved_timestamp": "1762652579.617341", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FINGU-AI/RomboUltima-32B", - "developer": "FINGU-AI", - "inference_platform": "unknown", - "id": "FINGU-AI/RomboUltima-32B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6671509372908327 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6938448333620042 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5385196374622356 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716442953020134 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4836354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.578873005319149 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 17.645 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_Ultimos-32B/fa69d78a-e112-45ff-80c3-b4eb30d83ed9.json b/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_Ultimos-32B/fa69d78a-e112-45ff-80c3-b4eb30d83ed9.json deleted file mode 100644 index c94d143a566d6f2ed4eb2b317b6d34b3e3e2a02f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_Ultimos-32B/fa69d78a-e112-45ff-80c3-b4eb30d83ed9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FINGU-AI_Ultimos-32B/1762652579.617578", - "retrieved_timestamp": "1762652579.617579", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FINGU-AI/Ultimos-32B", - "developer": "FINGU-AI", - "inference_platform": "unknown", - "id": "FINGU-AI/Ultimos-32B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1592197591280026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2905531373728777 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32860416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11112034574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 9.604 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FallenMerick/FallenMerick_Chewy-Lemon-Cookie-11B/f4f2289c-5b3c-4040-9e34-ac20352f45d7.json b/leaderboard_data/HFOpenLLMv2/FallenMerick/FallenMerick_Chewy-Lemon-Cookie-11B/f4f2289c-5b3c-4040-9e34-ac20352f45d7.json deleted file mode 100644 index 979995a7ca74f3dcf405cbb5534dface2614c77b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FallenMerick/FallenMerick_Chewy-Lemon-Cookie-11B/f4f2289c-5b3c-4040-9e34-ac20352f45d7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FallenMerick_Chewy-Lemon-Cookie-11B/1762652579.6178062", - "retrieved_timestamp": "1762652579.6178071", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FallenMerick/Chewy-Lemon-Cookie-11B", - "developer": "FallenMerick", - "inference_platform": "unknown", - "id": "FallenMerick/Chewy-Lemon-Cookie-11B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4875242135312083 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5251122307375103 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45455208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3267121010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Felladrin/Felladrin_Llama-160M-Chat-v1/0885ef86-d7ef-4261-8ccd-f0391c42ffe4.json b/leaderboard_data/HFOpenLLMv2/Felladrin/Felladrin_Llama-160M-Chat-v1/0885ef86-d7ef-4261-8ccd-f0391c42ffe4.json deleted file mode 100644 index ba4f2d907d94ced156e72daea85026c0b537d498..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Felladrin/Felladrin_Llama-160M-Chat-v1/0885ef86-d7ef-4261-8ccd-f0391c42ffe4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Felladrin_Llama-160M-Chat-v1/1762652579.618279", - "retrieved_timestamp": "1762652579.61828", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Felladrin/Llama-160M-Chat-v1", - "developer": "Felladrin", - "inference_platform": "unknown", - "id": "Felladrin/Llama-160M-Chat-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15754642127333254 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30360811146348365 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.366125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11361369680851063 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.162 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Felladrin/Felladrin_Minueza-32M-UltraChat/44324409-5cb3-438a-9751-9ee868b35233.json b/leaderboard_data/HFOpenLLMv2/Felladrin/Felladrin_Minueza-32M-UltraChat/44324409-5cb3-438a-9751-9ee868b35233.json deleted file mode 100644 index 1c5a1d6c34e9991c08c4cae24dd66bb77606a563..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Felladrin/Felladrin_Minueza-32M-UltraChat/44324409-5cb3-438a-9751-9ee868b35233.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Felladrin_Minueza-32M-UltraChat/1762652579.6187", - "retrieved_timestamp": "1762652579.6187022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Felladrin/Minueza-32M-UltraChat", - "developer": "Felladrin", - "inference_platform": "unknown", - "id": "Felladrin/Minueza-32M-UltraChat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13756277787381924 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2941478734048925 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37418749999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11328125 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 0.033 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d37d499c-74cc-4fbb-9a3c-80776ebf2b82.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d37d499c-74cc-4fbb-9a3c-80776ebf2b82.json deleted file mode 100644 index 256a6550122ab3fa717c4d0b58ee2326fc183aee..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d37d499c-74cc-4fbb-9a3c-80776ebf2b82.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/1762652579.618947", - "retrieved_timestamp": "1762652579.618948", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30832191917445706 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3323387445789459 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33021875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14976728723404256 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.5 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/fc62bbce-e2e4-4b41-b632-a09eb8b0a4d6.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/fc62bbce-e2e4-4b41-b632-a09eb8b0a4d6.json deleted file mode 100644 index dd47a9c7ce586d2c390cdcefe2910a00c9af41f2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/fc62bbce-e2e4-4b41-b632-a09eb8b0a4d6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/1762652579.619225", - "retrieved_timestamp": "1762652579.6192262", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.509730847484674 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5214989784123593 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43095833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37691156914893614 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 16.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/157d1e12-ced4-4b48-a651-5671a2b85ee6.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/157d1e12-ced4-4b48-a651-5671a2b85ee6.json deleted file mode 100644 index 406cf79d502f0a6056676fa4b345760b47dd2dd6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/157d1e12-ced4-4b48-a651-5671a2b85ee6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/1762652579.619448", - "retrieved_timestamp": "1762652579.6194491", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28154408081667753 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3305518729746925 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33021875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15408909574468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.5 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/5450695c-a1fd-431f-9201-19d858e48867.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/5450695c-a1fd-431f-9201-19d858e48867.json deleted file mode 100644 index 05e04bde570706a006d697e9911bb4d3266fa49e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/5450695c-a1fd-431f-9201-19d858e48867.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/1762652579.619661", - "retrieved_timestamp": "1762652579.619661", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3015775919006015 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33246082656550385 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3408229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14852061170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.5 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d780dd37-3e71-400a-93be-f9512ad77d3e.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d780dd37-3e71-400a-93be-f9512ad77d3e.json deleted file mode 100644 index 4824ff032d3216165d13d1beaeb22de85dc7bcfa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d780dd37-3e71-400a-93be-f9512ad77d3e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/1762652579.619875", - "retrieved_timestamp": "1762652579.6198761", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28693976426991497 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33465340701604496 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3289479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15550199468085107 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.5 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1000k_fineweb/4ba295dd-43f3-45d6-8abe-58cd6fb11eee.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1000k_fineweb/4ba295dd-43f3-45d6-8abe-58cd6fb11eee.json deleted file mode 100644 index 09772d0cb7766d3ec9403e0da95dd07f9d29f5a9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1000k_fineweb/4ba295dd-43f3-45d6-8abe-58cd6fb11eee.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1000k_fineweb/1762652579.620099", - "retrieved_timestamp": "1762652579.6201", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_1000k_fineweb", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_1000k_fineweb" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14845388014911545 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2917939408206228 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35806249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163563829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed/7d967a13-3d40-4a9c-ac1d-956c2b2b6b98.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed/7d967a13-3d40-4a9c-ac1d-956c2b2b6b98.json deleted file mode 100644 index 9820bbe9828317f76fff3802fa4256182e4216cd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed/7d967a13-3d40-4a9c-ac1d-956c2b2b6b98.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed/1762652579.620331", - "retrieved_timestamp": "1762652579.620332", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15537329840379083 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3066426145674803 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35803125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11427859042553191 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1000k_fineweb_uncovai_selected/93f69ae3-c779-4f6b-8ac9-9bd8478e7eb2.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1000k_fineweb_uncovai_selected/93f69ae3-c779-4f6b-8ac9-9bd8478e7eb2.json deleted file mode 100644 index 80137837760b9dae3db916bc7d42447b44020c57..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1000k_fineweb_uncovai_selected/93f69ae3-c779-4f6b-8ac9-9bd8478e7eb2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1000k_fineweb_uncovai_selected/1762652579.62055", - "retrieved_timestamp": "1762652579.6205509", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_selected", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_selected" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14678054229444543 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29317781029884354 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4047604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11569148936170212 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1200k_fineweb/3b102085-a3f6-4da6-abdf-f906f0b37f3c.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1200k_fineweb/3b102085-a3f6-4da6-abdf-f906f0b37f3c.json deleted file mode 100644 index 77be3598cc29b8f42b4b4d746cd641073d608842..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1200k_fineweb/3b102085-a3f6-4da6-abdf-f906f0b37f3c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1200k_fineweb/1762652579.620773", - "retrieved_timestamp": "1762652579.620773", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_1200k_fineweb", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_1200k_fineweb" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15809607397261488 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29409841468035297 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3713645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10762965425531915 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed/c8e1bfa5-d1dc-4bcb-9b91-397302006b1d.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed/c8e1bfa5-d1dc-4bcb-9b91-397302006b1d.json deleted file mode 100644 index 8f23adca06217fc6017d55c116170176ff996f1d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed/c8e1bfa5-d1dc-4bcb-9b91-397302006b1d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed/1762652579.6209762", - "retrieved_timestamp": "1762652579.620977", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.157771379938563 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29496212100634955 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36999999999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11394614361702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1200k_fineweb_uncovai_selected/d4dabe47-4bc9-46fe-8c2d-206d5ed8874a.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1200k_fineweb_uncovai_selected/d4dabe47-4bc9-46fe-8c2d-206d5ed8874a.json deleted file mode 100644 index 2afcb49f46abe1dd57403b06f50d4a90d7868193..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1200k_fineweb_uncovai_selected/d4dabe47-4bc9-46fe-8c2d-206d5ed8874a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1200k_fineweb_uncovai_selected/1762652579.6211882", - "retrieved_timestamp": "1762652579.6211882", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_selected", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_selected" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15847063569107744 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29604672415652145 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3567291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11643949468085106 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1400k_fineweb/c5cb1709-7ba4-438c-8af7-d96cb4ab4ad0.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1400k_fineweb/c5cb1709-7ba4-438c-8af7-d96cb4ab4ad0.json deleted file mode 100644 index 47ea25b72e9557fb9a8a40adb00c0681ee5e42b7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1400k_fineweb/c5cb1709-7ba4-438c-8af7-d96cb4ab4ad0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1400k_fineweb/1762652579.6213892", - "retrieved_timestamp": "1762652579.62139", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_1400k_fineweb", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_1400k_fineweb" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17638089158987041 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2921781950918249 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1079621010638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed/75cbe3a2-cbfa-482b-8c35-b74caf046df8.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed/75cbe3a2-cbfa-482b-8c35-b74caf046df8.json deleted file mode 100644 index 842e04193008e57fe2e5c6479d90a8a47babe2e5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed/75cbe3a2-cbfa-482b-8c35-b74caf046df8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed/1762652579.621598", - "retrieved_timestamp": "1762652579.621599", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17066051410258115 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2992388897714206 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3939375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11045545212765957 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1400k_fineweb_uncovai_selected/062fa044-0fd4-49ea-988d-f477c7930496.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1400k_fineweb_uncovai_selected/062fa044-0fd4-49ea-988d-f477c7930496.json deleted file mode 100644 index 8cd9a2adf4e6a285bcc79c62adeac063c3e974f5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1400k_fineweb_uncovai_selected/062fa044-0fd4-49ea-988d-f477c7930496.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1400k_fineweb_uncovai_selected/1762652579.621813", - "retrieved_timestamp": "1762652579.621814", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_selected", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_selected" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15384956360235286 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.291672957517483 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37406249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11369680851063829 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed/af001f63-a060-49ec-9bd3-f06b2ad96dc8.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed/af001f63-a060-49ec-9bd3-f06b2ad96dc8.json deleted file mode 100644 index 4f70179c1aa09809cacf1f4ed9d2ed113f2fc059..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed/af001f63-a060-49ec-9bd3-f06b2ad96dc8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed/1762652579.622025", - "retrieved_timestamp": "1762652579.622026", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14747979804695985 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30287372123209483 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35784375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11195146276595745 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_200k_fineweb_uncovai_selected/556e1124-135e-473f-9e62-852f095b3118.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_200k_fineweb_uncovai_selected/556e1124-135e-473f-9e62-852f095b3118.json deleted file mode 100644 index b6e838c0397d801dfeee07a4d51035720df093d4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_200k_fineweb_uncovai_selected/556e1124-135e-473f-9e62-852f095b3118.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_200k_fineweb_uncovai_selected/1762652579.622248", - "retrieved_timestamp": "1762652579.622248", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_selected", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_selected" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13451530827094332 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2927186496606003 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36603125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11311502659574468 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_400k_fineweb/982d6727-aa6c-41fe-abe7-47811ad3c9da.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_400k_fineweb/982d6727-aa6c-41fe-abe7-47811ad3c9da.json deleted file mode 100644 index 2cf1def1ef6cf2b2c605ea1417b99033285c4597..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_400k_fineweb/982d6727-aa6c-41fe-abe7-47811ad3c9da.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_400k_fineweb/1762652579.62247", - "retrieved_timestamp": "1762652579.62247", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_400k_fineweb", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_400k_fineweb" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1511267880335288 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29723404576965046 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3794270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11627327127659574 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed/7b8f532b-c3a5-48fe-9d3f-e9c8b6f6897d.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed/7b8f532b-c3a5-48fe-9d3f-e9c8b6f6897d.json deleted file mode 100644 index d17ad513fada11815807b510af6061e1a010bf4d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed/7b8f532b-c3a5-48fe-9d3f-e9c8b6f6897d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed/1762652579.622689", - "retrieved_timestamp": "1762652579.62269", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.155648124753432 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3048804422828362 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38599999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11377992021276596 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_400k_fineweb_uncovai_selected/1ce9e40f-5613-4d95-b451-a34f3feb961e.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_400k_fineweb_uncovai_selected/1ce9e40f-5613-4d95-b451-a34f3feb961e.json deleted file mode 100644 index db63b0194d5d1c02b15f7019e255e105d586260d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_400k_fineweb_uncovai_selected/1ce9e40f-5613-4d95-b451-a34f3feb961e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_400k_fineweb_uncovai_selected/1762652579.62291", - "retrieved_timestamp": "1762652579.622911", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_selected", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_selected" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15842076800666677 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2925171720555518 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38199999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1157746010638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_600k_fineweb/bf6d3042-aa42-45b5-8bb1-49a8c5e2fd50.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_600k_fineweb/bf6d3042-aa42-45b5-8bb1-49a8c5e2fd50.json deleted file mode 100644 index 3e1fc5d93b905c96085947364e50df49f9dd4ffc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_600k_fineweb/bf6d3042-aa42-45b5-8bb1-49a8c5e2fd50.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_600k_fineweb/1762652579.623165", - "retrieved_timestamp": "1762652579.6231658", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_600k_fineweb", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_600k_fineweb" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16391618682872555 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3013718229200533 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38085416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11261635638297872 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed/4446e0a4-abdc-48a4-83f7-cc3d4aeede78.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed/4446e0a4-abdc-48a4-83f7-cc3d4aeede78.json deleted file mode 100644 index e24b2382d0ae87ff1d159a009bfe1d8a01d62b13..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed/4446e0a4-abdc-48a4-83f7-cc3d4aeede78.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed/1762652579.623383", - "retrieved_timestamp": "1762652579.623384", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16414114549395603 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30001678726257036 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3793333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1146941489361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_600k_fineweb_uncovai_selected/52f63809-1390-4a66-8ae2-8f150425d2d9.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_600k_fineweb_uncovai_selected/52f63809-1390-4a66-8ae2-8f150425d2d9.json deleted file mode 100644 index 21689ed6598be421af00f16fedc67a12fe17150a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_600k_fineweb_uncovai_selected/52f63809-1390-4a66-8ae2-8f150425d2d9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_600k_fineweb_uncovai_selected/1762652579.623598", - "retrieved_timestamp": "1762652579.623599", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_selected", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_selected" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16059389087620846 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2983444769655102 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3846354166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11619015957446809 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_800k_fineweb/6b7b5025-01c0-470b-8856-b628b11f4e6c.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_800k_fineweb/6b7b5025-01c0-470b-8856-b628b11f4e6c.json deleted file mode 100644 index 3495f58cc736e411438967aa5f53ba9e4031469a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_800k_fineweb/6b7b5025-01c0-470b-8856-b628b11f4e6c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_800k_fineweb/1762652579.623817", - "retrieved_timestamp": "1762652579.623818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_800k_fineweb", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_800k_fineweb" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16414114549395603 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29594449748780255 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.370125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11519281914893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed/b85e5d55-dbdd-4383-ac86-75c83648c522.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed/b85e5d55-dbdd-4383-ac86-75c83648c522.json deleted file mode 100644 index 4d2efbda236dccea1241926f133196ab1951e2a0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed/b85e5d55-dbdd-4383-ac86-75c83648c522.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed/1762652579.62404", - "retrieved_timestamp": "1762652579.6240408", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1622927166584662 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3038096660271284 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3992708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11377992021276596 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_800k_fineweb_uncovai_selected/dcddcf2f-f3fe-4f45-8c42-e95b1ac99d88.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_800k_fineweb_uncovai_selected/dcddcf2f-f3fe-4f45-8c42-e95b1ac99d88.json deleted file mode 100644 index 052ad6c7aab8af7e1ea3d3f727775ec7ada6bb8a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_800k_fineweb_uncovai_selected/dcddcf2f-f3fe-4f45-8c42-e95b1ac99d88.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_800k_fineweb_uncovai_selected/1762652579.624255", - "retrieved_timestamp": "1762652579.624256", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_selected", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_selected" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14742993036254914 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2942808065535252 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3766354166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11303191489361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2_pretrained_200k_fineweb/3d10ce78-6474-48c0-8eb3-c5b7146d3e06.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2_pretrained_200k_fineweb/3d10ce78-6474-48c0-8eb3-c5b7146d3e06.json deleted file mode 100644 index f7bd1b3ffcd6327d3c6e24ee2f44a8a320154011..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2_pretrained_200k_fineweb/3d10ce78-6474-48c0-8eb3-c5b7146d3e06.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_smollm2_pretrained_200k_fineweb/1762652579.624471", - "retrieved_timestamp": "1762652579.624471", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/smollm2_pretrained_200k_fineweb", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/smollm2_pretrained_200k_fineweb" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15270039051937748 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.299468427221449 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11594082446808511 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/41e2bd81-2369-416a-9287-021872efd931.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/41e2bd81-2369-416a-9287-021872efd931.json deleted file mode 100644 index c07e5af67aae85dbe8e7afa640ad7c171c2d1432..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/41e2bd81-2369-416a-9287-021872efd931.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FlofloB_test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/1762652579.6246889", - "retrieved_timestamp": "1762652579.6246898", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FlofloB/test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit", - "developer": "FlofloB", - "inference_platform": "unknown", - "id": "FlofloB/test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.521546164177715 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5240829189778252 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42441666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3720910904255319 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 16.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FuJhen/FuJhen_ft-openhermes-25-mistral-7b-irca-dpo-pairs/bfaec047-518f-42a0-93a1-c6bda3589c26.json b/leaderboard_data/HFOpenLLMv2/FuJhen/FuJhen_ft-openhermes-25-mistral-7b-irca-dpo-pairs/bfaec047-518f-42a0-93a1-c6bda3589c26.json deleted file mode 100644 index c4c2972c9ab9de2c30c18030400c59fd3a28de76..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FuJhen/FuJhen_ft-openhermes-25-mistral-7b-irca-dpo-pairs/bfaec047-518f-42a0-93a1-c6bda3589c26.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FuJhen_ft-openhermes-25-mistral-7b-irca-dpo-pairs/1762652579.624908", - "retrieved_timestamp": "1762652579.6249092", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FuJhen/ft-openhermes-25-mistral-7b-irca-dpo-pairs", - "developer": "FuJhen", - "inference_platform": "unknown", - "id": "FuJhen/ft-openhermes-25-mistral-7b-irca-dpo-pairs" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5420041046645123 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47730323895548116 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.417375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2956283244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 14.483 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FuJhen/FuJhen_mistral-instruct-7B-DPO/5f79d177-3ca8-4c95-83bb-2abb0e803e72.json b/leaderboard_data/HFOpenLLMv2/FuJhen/FuJhen_mistral-instruct-7B-DPO/5f79d177-3ca8-4c95-83bb-2abb0e803e72.json deleted file mode 100644 index efbdfe34b29635735b4a6f5304293d5a62e166b7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FuJhen/FuJhen_mistral-instruct-7B-DPO/5f79d177-3ca8-4c95-83bb-2abb0e803e72.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FuJhen_mistral-instruct-7B-DPO/1762652579.625171", - "retrieved_timestamp": "1762652579.625172", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FuJhen/mistral-instruct-7B-DPO", - "developer": "FuJhen", - "inference_platform": "unknown", - "id": "FuJhen/mistral-instruct-7B-DPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49684171332065585 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46239050561386214 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4015625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30335771276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 14.496 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FuseAI/FuseAI_FuseChat-7B-v2.0/26ca0085-db25-4664-823a-f56e08081dc4.json b/leaderboard_data/HFOpenLLMv2/FuseAI/FuseAI_FuseChat-7B-v2.0/26ca0085-db25-4664-823a-f56e08081dc4.json deleted file mode 100644 index 4eec917d1d1f5f3e45ee3c383c0a26b9223c7b0c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FuseAI/FuseAI_FuseChat-7B-v2.0/26ca0085-db25-4664-823a-f56e08081dc4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FuseAI_FuseChat-7B-v2.0/1762652579.625878", - "retrieved_timestamp": "1762652579.625879", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FuseAI/FuseChat-7B-v2.0", - "developer": "FuseAI", - "inference_platform": "unknown", - "id": "FuseAI/FuseChat-7B-v2.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3423194900641409 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4954212795868764 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4796666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162400265957447 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FuseAI/FuseAI_FuseChat-Llama-3.1-8B-Instruct/fdc9ea4d-acf8-4f2c-b727-482f464eb925.json b/leaderboard_data/HFOpenLLMv2/FuseAI/FuseAI_FuseChat-Llama-3.1-8B-Instruct/fdc9ea4d-acf8-4f2c-b727-482f464eb925.json deleted file mode 100644 index b13fd775cf5b39aea093df08b73c5d9e4ccf75cd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FuseAI/FuseAI_FuseChat-Llama-3.1-8B-Instruct/fdc9ea4d-acf8-4f2c-b727-482f464eb925.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FuseAI_FuseChat-Llama-3.1-8B-Instruct/1762652579.626143", - "retrieved_timestamp": "1762652579.626144", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FuseAI/FuseChat-Llama-3.1-8B-Instruct", - "developer": "FuseAI", - "inference_platform": "unknown", - "id": "FuseAI/FuseChat-Llama-3.1-8B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7204816553411615 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5119887898349903 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24773413897280966 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38200000000000006 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37333776595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FuseAI/FuseAI_FuseChat-Llama-3.2-3B-Instruct/e39160a3-8332-467d-900f-52bb7d1446c1.json b/leaderboard_data/HFOpenLLMv2/FuseAI/FuseAI_FuseChat-Llama-3.2-3B-Instruct/e39160a3-8332-467d-900f-52bb7d1446c1.json deleted file mode 100644 index 5f3629f26440d69425f4e727aea4a560b2e5ddb1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FuseAI/FuseAI_FuseChat-Llama-3.2-3B-Instruct/e39160a3-8332-467d-900f-52bb7d1446c1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FuseAI_FuseChat-Llama-3.2-3B-Instruct/1762652579.626356", - "retrieved_timestamp": "1762652579.626357", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FuseAI/FuseChat-Llama-3.2-3B-Instruct", - "developer": "FuseAI", - "inference_platform": "unknown", - "id": "FuseAI/FuseChat-Llama-3.2-3B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.684886102208806 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46583679221755164 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24244712990936557 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39139583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31316489361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/FuseAI/FuseAI_FuseChat-Qwen-2.5-7B-Instruct/1bae6b5e-47b0-4fe2-847a-8aec0a36342e.json b/leaderboard_data/HFOpenLLMv2/FuseAI/FuseAI_FuseChat-Qwen-2.5-7B-Instruct/1bae6b5e-47b0-4fe2-847a-8aec0a36342e.json deleted file mode 100644 index 85c5c73dbb6e4fc92f67bc66025a96f6ae54bad1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/FuseAI/FuseAI_FuseChat-Qwen-2.5-7B-Instruct/1bae6b5e-47b0-4fe2-847a-8aec0a36342e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FuseAI_FuseChat-Qwen-2.5-7B-Instruct/1762652579.626579", - "retrieved_timestamp": "1762652579.626579", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FuseAI/FuseChat-Qwen-2.5-7B-Instruct", - "developer": "FuseAI", - "inference_platform": "unknown", - "id": "FuseAI/FuseChat-Qwen-2.5-7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5905641475728844 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.552599883615556 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4561933534743202 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41181848404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/GalrionSoftworks/GalrionSoftworks_MN-LooseCannon-12B-v1/eb76e049-3a5d-4786-9724-800b719a6113.json b/leaderboard_data/HFOpenLLMv2/GalrionSoftworks/GalrionSoftworks_MN-LooseCannon-12B-v1/eb76e049-3a5d-4786-9724-800b719a6113.json deleted file mode 100644 index 84e651f7a495168528f7f35b30de71e94e8ec84d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/GalrionSoftworks/GalrionSoftworks_MN-LooseCannon-12B-v1/eb76e049-3a5d-4786-9724-800b719a6113.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/GalrionSoftworks_MN-LooseCannon-12B-v1/1762652579.626794", - "retrieved_timestamp": "1762652579.626794", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "GalrionSoftworks/MN-LooseCannon-12B-v1", - "developer": "GalrionSoftworks", - "inference_platform": "unknown", - "id": "GalrionSoftworks/MN-LooseCannon-12B-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5417791459992819 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5128183808679557 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08534743202416918 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41384375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3195644946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/GalrionSoftworks/GalrionSoftworks_MagnusIntellectus-12B-v1/99a948ab-cc5b-4f3a-aae0-684cbfb6ffb3.json b/leaderboard_data/HFOpenLLMv2/GalrionSoftworks/GalrionSoftworks_MagnusIntellectus-12B-v1/99a948ab-cc5b-4f3a-aae0-684cbfb6ffb3.json deleted file mode 100644 index a31127a1ad2b0f5360ff5869f9e5674565472a55..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/GalrionSoftworks/GalrionSoftworks_MagnusIntellectus-12B-v1/99a948ab-cc5b-4f3a-aae0-684cbfb6ffb3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/GalrionSoftworks_MagnusIntellectus-12B-v1/1762652579.62705", - "retrieved_timestamp": "1762652579.627051", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "GalrionSoftworks/MagnusIntellectus-12B-v1", - "developer": "GalrionSoftworks", - "inference_platform": "unknown", - "id": "GalrionSoftworks/MagnusIntellectus-12B-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4421368635221213 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5323010476246133 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4428020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34208776595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/GoToCompany/GoToCompany_gemma2-9b-cpt-sahabatai-v1-instruct/68ff0a5c-9e76-410b-a4e3-4b7de0e7fe35.json b/leaderboard_data/HFOpenLLMv2/GoToCompany/GoToCompany_gemma2-9b-cpt-sahabatai-v1-instruct/68ff0a5c-9e76-410b-a4e3-4b7de0e7fe35.json deleted file mode 100644 index 562320e4ad7c8bded4a52ce3633fa45aa0a3566a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/GoToCompany/GoToCompany_gemma2-9b-cpt-sahabatai-v1-instruct/68ff0a5c-9e76-410b-a4e3-4b7de0e7fe35.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/GoToCompany_gemma2-9b-cpt-sahabatai-v1-instruct/1762652579.628178", - "retrieved_timestamp": "1762652579.628178", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct", - "developer": "GoToCompany", - "inference_platform": "unknown", - "id": "GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6550607942481504 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5954551751157878 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4778645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4263630319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/GoToCompany/GoToCompany_llama3-8b-cpt-sahabatai-v1-instruct/aa363693-a300-4545-b7f3-05492646c202.json b/leaderboard_data/HFOpenLLMv2/GoToCompany/GoToCompany_llama3-8b-cpt-sahabatai-v1-instruct/aa363693-a300-4545-b7f3-05492646c202.json deleted file mode 100644 index 3d9a9493e2b219bbd8a54ce589a4276beb646053..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/GoToCompany/GoToCompany_llama3-8b-cpt-sahabatai-v1-instruct/aa363693-a300-4545-b7f3-05492646c202.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/GoToCompany_llama3-8b-cpt-sahabatai-v1-instruct/1762652579.628486", - "retrieved_timestamp": "1762652579.628489", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "GoToCompany/llama3-8b-cpt-sahabatai-v1-instruct", - "developer": "GoToCompany", - "inference_platform": "unknown", - "id": "GoToCompany/llama3-8b-cpt-sahabatai-v1-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.523844510343666 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4951292004509417 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44884375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3453291223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1b9a4b84-1766-49ca-bd11-17a2340b9736.json b/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1b9a4b84-1766-49ca-bd11-17a2340b9736.json deleted file mode 100644 index 9bc0ceeacad943232fae7d894d851ef3c7551917..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1b9a4b84-1766-49ca-bd11-17a2340b9736.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1762652579.6293938", - "retrieved_timestamp": "1762652579.629396", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1", - "developer": "Goekdeniz-Guelmez", - "inference_platform": "unknown", - "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3416944817528602 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32921013057720044 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.002265861027190332 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3249166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16381316489361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/235adbd2-8128-4428-af57-8d8e310ba56f.json b/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/235adbd2-8128-4428-af57-8d8e310ba56f.json deleted file mode 100644 index 055a032a4dc2b25168db9406e574d2478162cd00..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/235adbd2-8128-4428-af57-8d8e310ba56f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1762652579.629041", - "retrieved_timestamp": "1762652579.629042", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1", - "developer": "Goekdeniz-Guelmez", - "inference_platform": "unknown", - "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.347189900574919 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32683063456958195 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16414561170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1/a82acc9c-4093-4e0d-a862-7d6eb3cb7146.json b/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1/a82acc9c-4093-4e0d-a862-7d6eb3cb7146.json deleted file mode 100644 index 7615c63553a3c8143ead5891e01816d48e26fc81..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1/a82acc9c-4093-4e0d-a862-7d6eb3cb7146.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1/1762652579.629639", - "retrieved_timestamp": "1762652579.6296399", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1", - "developer": "Goekdeniz-Guelmez", - "inference_platform": "unknown", - "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47685806992114255 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.418600731531926 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3674895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27825797872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2/baae7cee-8b76-456f-96dc-5ac900a9a36e.json b/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2/baae7cee-8b76-456f-96dc-5ac900a9a36e.json deleted file mode 100644 index 57edbee283ea99378232e38e56948f84a04bd94f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2/baae7cee-8b76-456f-96dc-5ac900a9a36e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2/1762652579.629877", - "retrieved_timestamp": "1762652579.629878", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2", - "developer": "Goekdeniz-Guelmez", - "inference_platform": "unknown", - "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.421553699738915 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40418921704436744 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23993288590604026 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37685416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25615026595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3/9363a90d-6ec7-4de2-af17-a3e3e25de7d9.json b/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3/9363a90d-6ec7-4de2-af17-a3e3e25de7d9.json deleted file mode 100644 index b1f8629d0e10d31e0d383e0b9940f445ae91d80d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3/9363a90d-6ec7-4de2-af17-a3e3e25de7d9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3/1762652579.630181", - "retrieved_timestamp": "1762652579.6301818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3", - "developer": "Goekdeniz-Guelmez", - "inference_platform": "unknown", - "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42525055740989465 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4053446177133173 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13066465256797583 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37018749999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25556848404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-14B-Instruct-abliterated-v4/af440c67-78de-4053-98d8-8cded9657860.json b/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-14B-Instruct-abliterated-v4/af440c67-78de-4053-98d8-8cded9657860.json deleted file mode 100644 index 1d5b274dfddd02762faacdcd25a011ceb251ddb9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-14B-Instruct-abliterated-v4/af440c67-78de-4053-98d8-8cded9657860.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-14B-Instruct-abliterated-v4/1762652579.6304152", - "retrieved_timestamp": "1762652579.630416", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-14B-Instruct-abliterated-v4", - "developer": "Goekdeniz-Guelmez", - "inference_platform": "unknown", - "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-14B-Instruct-abliterated-v4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8291666112581284 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6355637424320617 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5422960725075529 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3422818791946309 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4286666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5018284574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/9c443687-99df-4cd9-8e19-d40cd83b30bc.json b/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/9c443687-99df-4cd9-8e19-d40cd83b30bc.json deleted file mode 100644 index a19e25a032081d591df4268a94210ed485a2ecaf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/9c443687-99df-4cd9-8e19-d40cd83b30bc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/1762652579.630644", - "retrieved_timestamp": "1762652579.630645", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2", - "developer": "Goekdeniz-Guelmez", - "inference_platform": "unknown", - "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7813811797142693 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5309672164610734 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43539583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4119847074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_j.o.s.i.e.v4o-1.5b-dpo-stage1-v1/b6bf7c36-006c-4256-a315-1de70e2540c3.json b/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_j.o.s.i.e.v4o-1.5b-dpo-stage1-v1/b6bf7c36-006c-4256-a315-1de70e2540c3.json deleted file mode 100644 index 2714c553c725a848c14c6856838e4381b78fcb33..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_j.o.s.i.e.v4o-1.5b-dpo-stage1-v1/b6bf7c36-006c-4256-a315-1de70e2540c3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_j.o.s.i.e.v4o-1.5b-dpo-stage1-v1/1762652579.631213", - "retrieved_timestamp": "1762652579.631215", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Goekdeniz-Guelmez/j.o.s.i.e.v4o-1.5b-dpo-stage1-v1", - "developer": "Goekdeniz-Guelmez", - "inference_platform": "unknown", - "id": "Goekdeniz-Guelmez/j.o.s.i.e.v4o-1.5b-dpo-stage1-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41883092417009093 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41242101633634826 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3528541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2554853723404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_josie-3b-v6.0/89947a58-5e39-468e-bbbc-2f3556a1c8f1.json b/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_josie-3b-v6.0/89947a58-5e39-468e-bbbc-2f3556a1c8f1.json deleted file mode 100644 index 3546b84c26ad4a8ffda166bcc266e760b6ffae15..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_josie-3b-v6.0/89947a58-5e39-468e-bbbc-2f3556a1c8f1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_josie-3b-v6.0/1762652579.631514", - "retrieved_timestamp": "1762652579.6315148", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Goekdeniz-Guelmez/josie-3b-v6.0", - "developer": "Goekdeniz-Guelmez", - "inference_platform": "unknown", - "id": "Goekdeniz-Guelmez/josie-3b-v6.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6009554648333089 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4496147842264783 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2938066465256798 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.386125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32197473404255317 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_josie-7b-v6.0-step2000/7c2cc003-fab3-4fc9-a6b6-fb7075261e50.json b/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_josie-7b-v6.0-step2000/7c2cc003-fab3-4fc9-a6b6-fb7075261e50.json deleted file mode 100644 index 5c185a0a0df65161af538c76ce7b2ad37cada907..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_josie-7b-v6.0-step2000/7c2cc003-fab3-4fc9-a6b6-fb7075261e50.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_josie-7b-v6.0-step2000/1762652579.6322381", - "retrieved_timestamp": "1762652579.632239", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Goekdeniz-Guelmez/josie-7b-v6.0-step2000", - "developer": "Goekdeniz-Guelmez", - "inference_platform": "unknown", - "id": "Goekdeniz-Guelmez/josie-7b-v6.0-step2000" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7597740661444966 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.510712680636641 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42371601208459214 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45393750000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4011801861702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_josie-7b-v6.0-step2000/90d4e4e1-2185-4d21-8730-f1a4bf413157.json b/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_josie-7b-v6.0-step2000/90d4e4e1-2185-4d21-8730-f1a4bf413157.json deleted file mode 100644 index 4d912cb62ac0631fd05c78a282278b8e8fffabf5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_josie-7b-v6.0-step2000/90d4e4e1-2185-4d21-8730-f1a4bf413157.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_josie-7b-v6.0-step2000/1762652579.632", - "retrieved_timestamp": "1762652579.632001", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Goekdeniz-Guelmez/josie-7b-v6.0-step2000", - "developer": "Goekdeniz-Guelmez", - "inference_platform": "unknown", - "id": "Goekdeniz-Guelmez/josie-7b-v6.0-step2000" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7627716680629618 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5097811950503962 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45793750000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40325797872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_josie-7b-v6.0/aa158f5d-94a5-4f40-8a65-87fe9605abc1.json b/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_josie-7b-v6.0/aa158f5d-94a5-4f40-8a65-87fe9605abc1.json deleted file mode 100644 index e7aa100ccd96013ee990d8f1af8485242713abce..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_josie-7b-v6.0/aa158f5d-94a5-4f40-8a65-87fe9605abc1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_josie-7b-v6.0/1762652579.631763", - "retrieved_timestamp": "1762652579.631764", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Goekdeniz-Guelmez/josie-7b-v6.0", - "developer": "Goekdeniz-Guelmez", - "inference_platform": "unknown", - "id": "Goekdeniz-Guelmez/josie-7b-v6.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7411645544931892 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5104855208094123 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41539583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3806515957446808 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/GreenNode/GreenNode_GreenNode-small-9B-it/d13def83-5ff8-4cde-aef5-b3c268c40c16.json b/leaderboard_data/HFOpenLLMv2/GreenNode/GreenNode_GreenNode-small-9B-it/d13def83-5ff8-4cde-aef5-b3c268c40c16.json deleted file mode 100644 index 61f9284c1007b8df21f6c1e4ad1395ec95bf83e7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/GreenNode/GreenNode_GreenNode-small-9B-it/d13def83-5ff8-4cde-aef5-b3c268c40c16.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/GreenNode_GreenNode-small-9B-it/1762652579.6324449", - "retrieved_timestamp": "1762652579.632446", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "GreenNode/GreenNode-small-9B-it", - "developer": "GreenNode", - "inference_platform": "unknown", - "id": "GreenNode/GreenNode-small-9B-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7436125037123721 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.599383874005197 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17447129909365558 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42041666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3927027925531915 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/GritLM/GritLM_GritLM-7B-KTO/6d7f26d7-2336-4def-9d17-09d30a89e02d.json b/leaderboard_data/HFOpenLLMv2/GritLM/GritLM_GritLM-7B-KTO/6d7f26d7-2336-4def-9d17-09d30a89e02d.json deleted file mode 100644 index e9bc8277c182871d42e52831dbb00d096233b4d3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/GritLM/GritLM_GritLM-7B-KTO/6d7f26d7-2336-4def-9d17-09d30a89e02d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/GritLM_GritLM-7B-KTO/1762652579.632807", - "retrieved_timestamp": "1762652579.632808", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "GritLM/GritLM-7B-KTO", - "developer": "GritLM", - "inference_platform": "unknown", - "id": "GritLM/GritLM-7B-KTO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5310132670203948 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.485293719684692 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37102083333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26803523936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/GritLM/GritLM_GritLM-8x7B-KTO/de98eb82-0606-46b8-bbfb-d054a0f6ef2c.json b/leaderboard_data/HFOpenLLMv2/GritLM/GritLM_GritLM-8x7B-KTO/de98eb82-0606-46b8-bbfb-d054a0f6ef2c.json deleted file mode 100644 index fb1384089ea721c04d894bd48cdd83b3310f7d43..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/GritLM/GritLM_GritLM-8x7B-KTO/de98eb82-0606-46b8-bbfb-d054a0f6ef2c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/GritLM_GritLM-8x7B-KTO/1762652579.633089", - "retrieved_timestamp": "1762652579.633089", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "GritLM/GritLM-8x7B-KTO", - "developer": "GritLM", - "inference_platform": "unknown", - "id": "GritLM/GritLM-8x7B-KTO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5714049832222946 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5820304362331497 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12235649546827794 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42165625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36477726063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Gryphe/Gryphe_Pantheon-RP-1.5-12b-Nemo/f9ed0b0f-6fa9-4450-97fe-204f6dc8d88a.json b/leaderboard_data/HFOpenLLMv2/Gryphe/Gryphe_Pantheon-RP-1.5-12b-Nemo/f9ed0b0f-6fa9-4450-97fe-204f6dc8d88a.json deleted file mode 100644 index 89ae80fb263a4200a718de2cc27b6b7e381e4f66..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Gryphe/Gryphe_Pantheon-RP-1.5-12b-Nemo/f9ed0b0f-6fa9-4450-97fe-204f6dc8d88a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Gryphe_Pantheon-RP-1.5-12b-Nemo/1762652579.633812", - "retrieved_timestamp": "1762652579.633813", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Gryphe/Pantheon-RP-1.5-12b-Nemo", - "developer": "Gryphe", - "inference_platform": "unknown", - "id": "Gryphe/Pantheon-RP-1.5-12b-Nemo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47630841722186024 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.519582216884963 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44203125000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3302027925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Gryphe/Gryphe_Pantheon-RP-1.6-12b-Nemo-KTO/a2445d2d-b8a2-44e4-9c74-7401e7afde75.json b/leaderboard_data/HFOpenLLMv2/Gryphe/Gryphe_Pantheon-RP-1.6-12b-Nemo-KTO/a2445d2d-b8a2-44e4-9c74-7401e7afde75.json deleted file mode 100644 index 1b71efd901cda7f0acfa82a62819b1501c7e23d2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Gryphe/Gryphe_Pantheon-RP-1.6-12b-Nemo-KTO/a2445d2d-b8a2-44e4-9c74-7401e7afde75.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Gryphe_Pantheon-RP-1.6-12b-Nemo-KTO/1762652579.634284", - "retrieved_timestamp": "1762652579.634285", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Gryphe/Pantheon-RP-1.6-12b-Nemo-KTO", - "developer": "Gryphe", - "inference_platform": "unknown", - "id": "Gryphe/Pantheon-RP-1.6-12b-Nemo-KTO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4636187537954849 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5276980814125921 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4247916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33818151595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Gryphe/Gryphe_Pantheon-RP-1.6-12b-Nemo/9a2ca2e5-a2e9-460f-b4dc-a6293ca13003.json b/leaderboard_data/HFOpenLLMv2/Gryphe/Gryphe_Pantheon-RP-1.6-12b-Nemo/9a2ca2e5-a2e9-460f-b4dc-a6293ca13003.json deleted file mode 100644 index bd1320b2b1bad1173c137e8e3de7d8aeae58c605..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Gryphe/Gryphe_Pantheon-RP-1.6-12b-Nemo/9a2ca2e5-a2e9-460f-b4dc-a6293ca13003.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Gryphe_Pantheon-RP-1.6-12b-Nemo/1762652579.634059", - "retrieved_timestamp": "1762652579.6340601", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Gryphe/Pantheon-RP-1.6-12b-Nemo", - "developer": "Gryphe", - "inference_platform": "unknown", - "id": "Gryphe/Pantheon-RP-1.6-12b-Nemo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44805671174705336 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5204007434392454 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4287604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33111702127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Gryphe/Gryphe_Pantheon-RP-Pure-1.6.2-22b-Small/f5f73aa0-2223-49c0-a2ad-df38ee33355b.json b/leaderboard_data/HFOpenLLMv2/Gryphe/Gryphe_Pantheon-RP-Pure-1.6.2-22b-Small/f5f73aa0-2223-49c0-a2ad-df38ee33355b.json deleted file mode 100644 index 8d4e7e18b7e323bc61c7ea597db1f9d33b143707..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Gryphe/Gryphe_Pantheon-RP-Pure-1.6.2-22b-Small/f5f73aa0-2223-49c0-a2ad-df38ee33355b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Gryphe_Pantheon-RP-Pure-1.6.2-22b-Small/1762652579.6344929", - "retrieved_timestamp": "1762652579.6344929", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Gryphe/Pantheon-RP-Pure-1.6.2-22b-Small", - "developer": "Gryphe", - "inference_platform": "unknown", - "id": "Gryphe/Pantheon-RP-Pure-1.6.2-22b-Small" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6931042965996888 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5304537230538597 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20241691842900303 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37647916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39419880319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/GuilhermeNaturaUmana/GuilhermeNaturaUmana_Nature-Reason-1.2-reallysmall/5aa1bdc6-4b8f-411f-9150-41217a94ec5e.json b/leaderboard_data/HFOpenLLMv2/GuilhermeNaturaUmana/GuilhermeNaturaUmana_Nature-Reason-1.2-reallysmall/5aa1bdc6-4b8f-411f-9150-41217a94ec5e.json deleted file mode 100644 index e0c5e6e207085b136a481845472bc38ce81d63c6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/GuilhermeNaturaUmana/GuilhermeNaturaUmana_Nature-Reason-1.2-reallysmall/5aa1bdc6-4b8f-411f-9150-41217a94ec5e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/GuilhermeNaturaUmana_Nature-Reason-1.2-reallysmall/1762652579.63471", - "retrieved_timestamp": "1762652579.634711", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall", - "developer": "GuilhermeNaturaUmana", - "inference_platform": "unknown", - "id": "GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4985405391029136 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5644838945274894 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25755287009063443 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43728125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44290226063829785 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/GuilhermeNaturaUmana/GuilhermeNaturaUmana_Nature-Reason-1.2-reallysmall/9ddf874c-16a9-4f66-a3c5-140f10bc4787.json b/leaderboard_data/HFOpenLLMv2/GuilhermeNaturaUmana/GuilhermeNaturaUmana_Nature-Reason-1.2-reallysmall/9ddf874c-16a9-4f66-a3c5-140f10bc4787.json deleted file mode 100644 index 933bf263cce590fd9d86b8c1862b75bec34e3ea9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/GuilhermeNaturaUmana/GuilhermeNaturaUmana_Nature-Reason-1.2-reallysmall/9ddf874c-16a9-4f66-a3c5-140f10bc4787.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/GuilhermeNaturaUmana_Nature-Reason-1.2-reallysmall/1762652579.634963", - "retrieved_timestamp": "1762652579.634964", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall", - "developer": "GuilhermeNaturaUmana", - "inference_platform": "unknown", - "id": "GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47910654840268263 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5648715950622487 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4439166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4408244680851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HarbingerX/HarbingerX_Zeitgeist-3b-V1.2/37dad0cc-36d1-4a4c-8d9c-0f5246889a0c.json b/leaderboard_data/HFOpenLLMv2/HarbingerX/HarbingerX_Zeitgeist-3b-V1.2/37dad0cc-36d1-4a4c-8d9c-0f5246889a0c.json deleted file mode 100644 index c13f5a5cb0c0e0c78c59efba4334427ae203af6e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HarbingerX/HarbingerX_Zeitgeist-3b-V1.2/37dad0cc-36d1-4a4c-8d9c-0f5246889a0c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HarbingerX_Zeitgeist-3b-V1.2/1762652579.6374269", - "retrieved_timestamp": "1762652579.637428", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HarbingerX/Zeitgeist-3b-V1.2", - "developer": "HarbingerX", - "inference_platform": "unknown", - "id": "HarbingerX/Zeitgeist-3b-V1.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6754189993661264 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4440650477102142 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10120845921450151 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35790625000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30560172872340424 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HarbingerX/HarbingerX_Zeitgeist-3b-V1/3bc34460-661d-404b-bb1c-5b2fe395b897.json b/leaderboard_data/HFOpenLLMv2/HarbingerX/HarbingerX_Zeitgeist-3b-V1/3bc34460-661d-404b-bb1c-5b2fe395b897.json deleted file mode 100644 index 7eaa7ffd8d39cccef6a9de835e078deb6a3bf97c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HarbingerX/HarbingerX_Zeitgeist-3b-V1/3bc34460-661d-404b-bb1c-5b2fe395b897.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HarbingerX_Zeitgeist-3b-V1/1762652579.637166", - "retrieved_timestamp": "1762652579.6371672", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HarbingerX/Zeitgeist-3b-V1", - "developer": "HarbingerX", - "inference_platform": "unknown", - "id": "HarbingerX/Zeitgeist-3b-V1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6711724889958643 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4440790761237121 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3579375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3009474734042553 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Hastagaras/Hastagaras_L3.2-JametMini-3B-MK.III/cf208ef7-8a9b-4633-8161-dae0825c380e.json b/leaderboard_data/HFOpenLLMv2/Hastagaras/Hastagaras_L3.2-JametMini-3B-MK.III/cf208ef7-8a9b-4633-8161-dae0825c380e.json deleted file mode 100644 index eaaad28c8a8616666ba5b09d368ad9f929756c27..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Hastagaras/Hastagaras_L3.2-JametMini-3B-MK.III/cf208ef7-8a9b-4633-8161-dae0825c380e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Hastagaras_L3.2-JametMini-3B-MK.III/1762652579.6376362", - "retrieved_timestamp": "1762652579.6376371", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Hastagaras/L3.2-JametMini-3B-MK.III", - "developer": "Hastagaras", - "inference_platform": "unknown", - "id": "Hastagaras/L3.2-JametMini-3B-MK.III" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6182662003484088 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45385245294894094 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14577039274924472 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2982878989361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HelpingAI/HelpingAI_Cipher-20B/21f72176-cf3b-43ae-aa6e-51d9fe5a6e90.json b/leaderboard_data/HFOpenLLMv2/HelpingAI/HelpingAI_Cipher-20B/21f72176-cf3b-43ae-aa6e-51d9fe5a6e90.json deleted file mode 100644 index 7695ed0f03390b91dfcdfdc6f800c26b9f336bd2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HelpingAI/HelpingAI_Cipher-20B/21f72176-cf3b-43ae-aa6e-51d9fe5a6e90.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HelpingAI_Cipher-20B/1762652579.638349", - "retrieved_timestamp": "1762652579.63835", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HelpingAI/Cipher-20B", - "developer": "HelpingAI", - "inference_platform": "unknown", - "id": "HelpingAI/Cipher-20B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5377575942942504 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6032432743536918 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19939577039274925 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40029166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3744182180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 20.551 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HelpingAI/HelpingAI_Dhanishtha-Large/e097ccca-ab91-4f16-bbfa-ca97c91fdb77.json b/leaderboard_data/HFOpenLLMv2/HelpingAI/HelpingAI_Dhanishtha-Large/e097ccca-ab91-4f16-bbfa-ca97c91fdb77.json deleted file mode 100644 index af7d5be7d050742757864777f2920fa084d2601e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HelpingAI/HelpingAI_Dhanishtha-Large/e097ccca-ab91-4f16-bbfa-ca97c91fdb77.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HelpingAI_Dhanishtha-Large/1762652579.638597", - "retrieved_timestamp": "1762652579.638598", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HelpingAI/Dhanishtha-Large", - "developer": "HelpingAI", - "inference_platform": "unknown", - "id": "HelpingAI/Dhanishtha-Large" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24567370133468086 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46036539145861094 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38451041666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2755152925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HelpingAI/HelpingAI_Priya-10B/94aca944-b0a9-46ec-bdab-53bb5cbe3b78.json b/leaderboard_data/HFOpenLLMv2/HelpingAI/HelpingAI_Priya-10B/94aca944-b0a9-46ec-bdab-53bb5cbe3b78.json deleted file mode 100644 index a2bdd995193b81175dd6eb7e674f056663ba1579..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HelpingAI/HelpingAI_Priya-10B/94aca944-b0a9-46ec-bdab-53bb5cbe3b78.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HelpingAI_Priya-10B/1762652579.638817", - "retrieved_timestamp": "1762652579.638818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HelpingAI/Priya-10B", - "developer": "HelpingAI", - "inference_platform": "unknown", - "id": "HelpingAI/Priya-10B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40429283190822574 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4441457310476767 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3792708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24925199468085107 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.211 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HelpingAI/HelpingAI_Priya-3B/f709afd7-3220-41b0-909a-74d9086c7dd9.json b/leaderboard_data/HFOpenLLMv2/HelpingAI/HelpingAI_Priya-3B/f709afd7-3220-41b0-909a-74d9086c7dd9.json deleted file mode 100644 index 491e056ff9e4ad4e3db158404e5224b1f62419a0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HelpingAI/HelpingAI_Priya-3B/f709afd7-3220-41b0-909a-74d9086c7dd9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HelpingAI_Priya-3B/1762652579.639023", - "retrieved_timestamp": "1762652579.639024", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HelpingAI/Priya-3B", - "developer": "HelpingAI", - "inference_platform": "unknown", - "id": "HelpingAI/Priya-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4525780484669566 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3961184863327844 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3713020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23387632978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.81 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceH4/HuggingFaceH4_zephyr-7b-alpha/2029aa96-40b2-4af8-a7fa-8ae968b20502.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceH4/HuggingFaceH4_zephyr-7b-alpha/2029aa96-40b2-4af8-a7fa-8ae968b20502.json deleted file mode 100644 index 58383a59a037d9f37c4e9cdda9d3f39ccffbb99b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HuggingFaceH4/HuggingFaceH4_zephyr-7b-alpha/2029aa96-40b2-4af8-a7fa-8ae968b20502.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HuggingFaceH4_zephyr-7b-alpha/1762652579.640769", - "retrieved_timestamp": "1762652579.64077", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HuggingFaceH4/zephyr-7b-alpha", - "developer": "HuggingFaceH4", - "inference_platform": "unknown", - "id": "HuggingFaceH4/zephyr-7b-alpha" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5191480826429429 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45828635059044115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3949583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2795046542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceH4/HuggingFaceH4_zephyr-7b-beta/3b9d5166-4144-4222-a39d-3d1d3956a6e8.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceH4/HuggingFaceH4_zephyr-7b-beta/3b9d5166-4144-4222-a39d-3d1d3956a6e8.json deleted file mode 100644 index 737d808d10d243c3e699bdc382a4ba8ddecd2f44..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HuggingFaceH4/HuggingFaceH4_zephyr-7b-beta/3b9d5166-4144-4222-a39d-3d1d3956a6e8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HuggingFaceH4_zephyr-7b-beta/1762652579.641025", - "retrieved_timestamp": "1762652579.641026", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HuggingFaceH4/zephyr-7b-beta", - "developer": "HuggingFaceH4", - "inference_platform": "unknown", - "id": "HuggingFaceH4/zephyr-7b-beta" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49504315216957673 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.431582191918003 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3925416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2780917553191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceH4/HuggingFaceH4_zephyr-orpo-141b-A35b-v0.1/8b347bb4-9f6d-4c82-bd5d-2fb5f7c8f881.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceH4/HuggingFaceH4_zephyr-orpo-141b-A35b-v0.1/8b347bb4-9f6d-4c82-bd5d-2fb5f7c8f881.json deleted file mode 100644 index f2336f38bfd558226cbbd151ba5e1be134eb0ea3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HuggingFaceH4/HuggingFaceH4_zephyr-orpo-141b-A35b-v0.1/8b347bb4-9f6d-4c82-bd5d-2fb5f7c8f881.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HuggingFaceH4_zephyr-orpo-141b-A35b-v0.1/1762652579.641484", - "retrieved_timestamp": "1762652579.641485", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1", - "developer": "HuggingFaceH4", - "inference_platform": "unknown", - "id": "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6510891102275296 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6290439728524093 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20468277945619334 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3783557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4465208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4586103723404255 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 140.621 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-1.7B-Instruct/690a5844-000e-4949-bbf9-8bd1ff2cb1bd.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-1.7B-Instruct/690a5844-000e-4949-bbf9-8bd1ff2cb1bd.json deleted file mode 100644 index d491265c88174a5e752c5e5a4b11d5b4c7aaf322..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-1.7B-Instruct/690a5844-000e-4949-bbf9-8bd1ff2cb1bd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-1.7B-Instruct/1762652579.641991", - "retrieved_timestamp": "1762652579.641991", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM-1.7B-Instruct", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM-1.7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23478259905938464 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28851114363217695 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3486666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11660571808510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.71 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-1.7B/e1b7c18a-bff1-44a3-b589-95bcb0f88e36.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-1.7B/e1b7c18a-bff1-44a3-b589-95bcb0f88e36.json deleted file mode 100644 index 8a24a576a83065e42cca88d230a44a8b4b50a7c4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-1.7B/e1b7c18a-bff1-44a3-b589-95bcb0f88e36.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-1.7B/1762652579.6417458", - "retrieved_timestamp": "1762652579.6417458", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM-1.7B", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM-1.7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23615673080759053 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3180516538964782 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24161073825503357 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34209375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11477726063829788 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.71 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-135M-Instruct/adff7af4-9bae-420a-9751-9f68ab81bf99.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-135M-Instruct/adff7af4-9bae-420a-9751-9f68ab81bf99.json deleted file mode 100644 index a3a8be93fa0185a2cf3a4f9fd6bcd29d7bd8b7c9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-135M-Instruct/adff7af4-9bae-420a-9751-9f68ab81bf99.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-135M-Instruct/1762652579.642397", - "retrieved_timestamp": "1762652579.6423979", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM-135M-Instruct", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM-135M-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12140121544169469 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30150816789978757 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36345833333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11760305851063829 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-135M/8cd60e42-3429-4938-b43e-9c951a57ca9f.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-135M/8cd60e42-3429-4938-b43e-9c951a57ca9f.json deleted file mode 100644 index ef56ece1f0b881b3173582c7988d8fa47f0dcb87..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-135M/8cd60e42-3429-4938-b43e-9c951a57ca9f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-135M/1762652579.642195", - "retrieved_timestamp": "1762652579.642196", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM-135M", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM-135M" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21247622973709757 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3046054260062988 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4366041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11220079787234043 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.13 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-360M-Instruct/ec13c105-c846-4420-91af-d42e98b7a818.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-360M-Instruct/ec13c105-c846-4420-91af-d42e98b7a818.json deleted file mode 100644 index 5a94fe9cf3b1f20c90732bf4130789bcfb46a12d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-360M-Instruct/ec13c105-c846-4420-91af-d42e98b7a818.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-360M-Instruct/1762652579.642821", - "retrieved_timestamp": "1762652579.642821", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM-360M-Instruct", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM-360M-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19516549422199764 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28851114363217695 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34717708333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11660571808510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.362 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-360M/236f7bdd-be50-4287-82b7-6efddc9dd3f4.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-360M/236f7bdd-be50-4287-82b7-6efddc9dd3f4.json deleted file mode 100644 index d2462466b27565b2fe3fc8df8ac07c4cc8bf0fae..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-360M/236f7bdd-be50-4287-82b7-6efddc9dd3f4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-360M/1762652579.642613", - "retrieved_timestamp": "1762652579.6426141", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM-360M", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM-360M" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2133505764704318 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30645160333152527 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40178125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11236702127659574 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.36 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-1.7B-Instruct/09b81183-8ff2-44d5-a515-63cddc3e55c6.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-1.7B-Instruct/09b81183-8ff2-44d5-a515-63cddc3e55c6.json deleted file mode 100644 index 8aea073920e7e6b357310ae4b934dfe573ffdc74..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-1.7B-Instruct/09b81183-8ff2-44d5-a515-63cddc3e55c6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-1.7B-Instruct/1762652579.643299", - "retrieved_timestamp": "1762652579.6433", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM2-1.7B-Instruct", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM2-1.7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5367835121920947 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3598617531415158 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.342125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2053690159574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.711 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-1.7B/db57503c-bfe7-4691-983e-68af941e8b1e.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-1.7B/db57503c-bfe7-4691-983e-68af941e8b1e.json deleted file mode 100644 index 6e79543b3c7a7d1fd9a537fa05d433b7ad92fe66..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-1.7B/db57503c-bfe7-4691-983e-68af941e8b1e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-1.7B/1762652579.6430368", - "retrieved_timestamp": "1762652579.643038", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM2-1.7B", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM2-1.7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2440003634800108 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3452594377166261 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3485416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2137632978723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.71 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-135M-Instruct/9a9fb17d-49ae-4a82-95c8-c8b55923d72f.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-135M-Instruct/9a9fb17d-49ae-4a82-95c8-c8b55923d72f.json deleted file mode 100644 index b84ca446f0b7bff66ca275844f9caed9a5c8ae42..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-135M-Instruct/9a9fb17d-49ae-4a82-95c8-c8b55923d72f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-135M-Instruct/1762652579.644038", - "retrieved_timestamp": "1762652579.644039", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM2-135M-Instruct", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM2-135M-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05925167444602544 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31347502947335903 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23406040268456377 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3871458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10920877659574468 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-135M-Instruct/df60b16b-184c-43d9-ac79-8627f09d265b.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-135M-Instruct/df60b16b-184c-43d9-ac79-8627f09d265b.json deleted file mode 100644 index 3f8103f38b4f00932486995ff22094ac6624f122..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-135M-Instruct/df60b16b-184c-43d9-ac79-8627f09d265b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-135M-Instruct/1762652579.643796", - "retrieved_timestamp": "1762652579.643796", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM2-135M-Instruct", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM2-135M-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2883138960181208 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3124321328066677 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0030211480362537764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23573825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36621875000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11145279255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-135M/1761caca-524f-4d59-81dd-631e3e24e0e5.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-135M/1761caca-524f-4d59-81dd-631e3e24e0e5.json deleted file mode 100644 index 4b67a7664281db0efc7c1fa05577b36ac6c456a7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-135M/1761caca-524f-4d59-81dd-631e3e24e0e5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-135M/1762652579.643546", - "retrieved_timestamp": "1762652579.6435468", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM2-135M", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM2-135M" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18177657504310785 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3044234246877141 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4111770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10945811170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-360M-Instruct/06409b6c-9d26-4bee-af75-16e6edb87a93.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-360M-Instruct/06409b6c-9d26-4bee-af75-16e6edb87a93.json deleted file mode 100644 index b491ae17e470f57dda6a27bddd2b8094bb0f195e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-360M-Instruct/06409b6c-9d26-4bee-af75-16e6edb87a93.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-360M-Instruct/1762652579.644474", - "retrieved_timestamp": "1762652579.644475", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM2-360M-Instruct", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM2-360M-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08303191088533979 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3052703401844317 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34228125000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11261635638297872 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.362 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-360M-Instruct/09ba6e80-5ab4-4c8c-b7ad-c1497413c207.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-360M-Instruct/09ba6e80-5ab4-4c8c-b7ad-c1497413c207.json deleted file mode 100644 index 8390cf0fe8a3351f3ffa543fea1a30cb96e66c17..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-360M-Instruct/09ba6e80-5ab4-4c8c-b7ad-c1497413c207.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-360M-Instruct/1762652579.6446972", - "retrieved_timestamp": "1762652579.6446981", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM2-360M-Instruct", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM2-360M-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38415958545548035 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31435050538888504 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.346125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11170212765957446 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.36 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-360M/7751b65d-2bba-465c-9a1e-5ae51d94fcf6.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-360M/7751b65d-2bba-465c-9a1e-5ae51d94fcf6.json deleted file mode 100644 index d951d5a809cf2cc4140b2a6e4b4012a2442f0f86..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-360M/7751b65d-2bba-465c-9a1e-5ae51d94fcf6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-360M/1762652579.6442492", - "retrieved_timestamp": "1762652579.6442502", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HuggingFaceTB/SmolLM2-360M", - "developer": "HuggingFaceTB", - "inference_platform": "unknown", - "id": "HuggingFaceTB/SmolLM2-360M" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21145227995053123 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3233478044302361 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3954270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11693816489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.36 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HumanLLMs/HumanLLMs_Humanish-LLama3-8B-Instruct/e69e4e90-8177-44f5-8497-0a45ca9155ea.json b/leaderboard_data/HFOpenLLMv2/HumanLLMs/HumanLLMs_Humanish-LLama3-8B-Instruct/e69e4e90-8177-44f5-8497-0a45ca9155ea.json deleted file mode 100644 index 4b5a70e0b19dd07b3025ab7aeaf2f838dc33e08c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HumanLLMs/HumanLLMs_Humanish-LLama3-8B-Instruct/e69e4e90-8177-44f5-8497-0a45ca9155ea.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HumanLLMs_Humanish-LLama3-8B-Instruct/1762652579.6448839", - "retrieved_timestamp": "1762652579.644885", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HumanLLMs/Humanish-LLama3-8B-Instruct", - "developer": "HumanLLMs", - "inference_platform": "unknown", - "id": "HumanLLMs/Humanish-LLama3-8B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6497903340913221 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49677096627896544 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35815624999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37017952127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HumanLLMs/HumanLLMs_Humanish-Mistral-Nemo-Instruct-2407/de0dbc50-5d26-4005-967c-3dcbde3a1282.json b/leaderboard_data/HFOpenLLMv2/HumanLLMs/HumanLLMs_Humanish-Mistral-Nemo-Instruct-2407/de0dbc50-5d26-4005-967c-3dcbde3a1282.json deleted file mode 100644 index 1f457a2336abca2d80d31de01f0105ddd9ad0118..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HumanLLMs/HumanLLMs_Humanish-Mistral-Nemo-Instruct-2407/de0dbc50-5d26-4005-967c-3dcbde3a1282.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HumanLLMs_Humanish-Mistral-Nemo-Instruct-2407/1762652579.6451478", - "retrieved_timestamp": "1762652579.645149", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HumanLLMs/Humanish-Mistral-Nemo-Instruct-2407", - "developer": "HumanLLMs", - "inference_platform": "unknown", - "id": "HumanLLMs/Humanish-Mistral-Nemo-Instruct-2407" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5451269298793867 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5261780772532613 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13670694864048338 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39676041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35206117021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/HumanLLMs/HumanLLMs_Humanish-Qwen2.5-7B-Instruct/df720663-5e82-4de7-9a19-88287bb5f56a.json b/leaderboard_data/HFOpenLLMv2/HumanLLMs/HumanLLMs_Humanish-Qwen2.5-7B-Instruct/df720663-5e82-4de7-9a19-88287bb5f56a.json deleted file mode 100644 index 0e791bf417c3c55ac7cde89209a1ff64c326a9de..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/HumanLLMs/HumanLLMs_Humanish-Qwen2.5-7B-Instruct/df720663-5e82-4de7-9a19-88287bb5f56a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HumanLLMs_Humanish-Qwen2.5-7B-Instruct/1762652579.645365", - "retrieved_timestamp": "1762652579.645366", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HumanLLMs/Humanish-Qwen2.5-7B-Instruct", - "developer": "HumanLLMs", - "inference_platform": "unknown", - "id": "HumanLLMs/Humanish-Qwen2.5-7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7284250233824031 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5363681457807072 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3980625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4398271276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Intel/Intel_neural-chat-7b-v3-1/23b6bf8e-c79a-4620-9e15-2742f45130af.json b/leaderboard_data/HFOpenLLMv2/Intel/Intel_neural-chat-7b-v3-1/23b6bf8e-c79a-4620-9e15-2742f45130af.json deleted file mode 100644 index f4372cd850e24401b523058e8ff026e196c3e1b6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Intel/Intel_neural-chat-7b-v3-1/23b6bf8e-c79a-4620-9e15-2742f45130af.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Intel_neural-chat-7b-v3-1/1762652579.6473012", - "retrieved_timestamp": "1762652579.647302", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Intel/neural-chat-7b-v3-1", - "developer": "Intel", - "inference_platform": "unknown", - "id": "Intel/neural-chat-7b-v3-1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4686897432146704 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5051565464054848 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.035498489425981876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49789583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2677859042553192 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Intel/Intel_neural-chat-7b-v3-2/f8842523-53de-4197-9cf4-979780cbe127.json b/leaderboard_data/HFOpenLLMv2/Intel/Intel_neural-chat-7b-v3-2/f8842523-53de-4197-9cf4-979780cbe127.json deleted file mode 100644 index 08e7574a2b012cb1913a4f9c609f7eb1b17c2aae..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Intel/Intel_neural-chat-7b-v3-2/f8842523-53de-4197-9cf4-979780cbe127.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Intel_neural-chat-7b-v3-2/1762652579.647583", - "retrieved_timestamp": "1762652579.647584", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Intel/neural-chat-7b-v3-2", - "developer": "Intel", - "inference_platform": "unknown", - "id": "Intel/neural-chat-7b-v3-2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4988397452093778 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5032226831964403 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48952083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26670545212765956 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Intel/Intel_neural-chat-7b-v3-3/0bec0f9a-863b-42f5-96eb-7263eb1c8a61.json b/leaderboard_data/HFOpenLLMv2/Intel/Intel_neural-chat-7b-v3-3/0bec0f9a-863b-42f5-96eb-7263eb1c8a61.json deleted file mode 100644 index 55271e9a4bbc16a1644219f4fe888e2bbdd9174d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Intel/Intel_neural-chat-7b-v3-3/0bec0f9a-863b-42f5-96eb-7263eb1c8a61.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Intel_neural-chat-7b-v3-3/1762652579.6477928", - "retrieved_timestamp": "1762652579.647794", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Intel/neural-chat-7b-v3-3", - "developer": "Intel", - "inference_platform": "unknown", - "id": "Intel/neural-chat-7b-v3-3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4762585495374495 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48766180524289693 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4859583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2624667553191489 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Intel/Intel_neural-chat-7b-v3/617dbd41-3ca3-46d8-8fd2-491d6be39554.json b/leaderboard_data/HFOpenLLMv2/Intel/Intel_neural-chat-7b-v3/617dbd41-3ca3-46d8-8fd2-491d6be39554.json deleted file mode 100644 index 84cbcc05bc5a969af323fc811eda5eb57a0725e2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Intel/Intel_neural-chat-7b-v3/617dbd41-3ca3-46d8-8fd2-491d6be39554.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Intel_neural-chat-7b-v3/1762652579.646828", - "retrieved_timestamp": "1762652579.6468291", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Intel/neural-chat-7b-v3", - "developer": "Intel", - "inference_platform": "unknown", - "id": "Intel/neural-chat-7b-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27779735546128714 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5048316221363103 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5054895833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26986369680851063 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Invalid-Null/Invalid-Null_PeiYangMe-0.5/c645a252-366a-4890-a16b-bf687bfbb593.json b/leaderboard_data/HFOpenLLMv2/Invalid-Null/Invalid-Null_PeiYangMe-0.5/c645a252-366a-4890-a16b-bf687bfbb593.json deleted file mode 100644 index 7bbd2573fa69babfb73405d1a5f408249d636044..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Invalid-Null/Invalid-Null_PeiYangMe-0.5/c645a252-366a-4890-a16b-bf687bfbb593.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Invalid-Null_PeiYangMe-0.5/1762652579.648252", - "retrieved_timestamp": "1762652579.648252", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Invalid-Null/PeiYangMe-0.5", - "developer": "Invalid-Null", - "inference_platform": "unknown", - "id": "Invalid-Null/PeiYangMe-0.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14088507382633633 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27907748194216614 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24412751677852348 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37381249999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11087101063829788 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.061 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Invalid-Null/Invalid-Null_PeiYangMe-0.7/294c1745-38cb-4b1e-aae6-e2878ab9065a.json b/leaderboard_data/HFOpenLLMv2/Invalid-Null/Invalid-Null_PeiYangMe-0.7/294c1745-38cb-4b1e-aae6-e2878ab9065a.json deleted file mode 100644 index 0faf507c26620ef798faa1e8d69c81c3b20d0bbc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Invalid-Null/Invalid-Null_PeiYangMe-0.7/294c1745-38cb-4b1e-aae6-e2878ab9065a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Invalid-Null_PeiYangMe-0.7/1762652579.648521", - "retrieved_timestamp": "1762652579.648522", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Invalid-Null/PeiYangMe-0.7", - "developer": "Invalid-Null", - "inference_platform": "unknown", - "id": "Invalid-Null/PeiYangMe-0.7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1491032682172192 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30275310145886614 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2332214765100671 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38571874999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11012300531914894 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.061 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Isaak-Carter/Isaak-Carter_JOSIEv4o-8b-stage1-v4/e8bdfeef-9795-4b00-adec-6ac41c6718f7.json b/leaderboard_data/HFOpenLLMv2/Isaak-Carter/Isaak-Carter_JOSIEv4o-8b-stage1-v4/e8bdfeef-9795-4b00-adec-6ac41c6718f7.json deleted file mode 100644 index bffaabc96994fc1eea9c3c0e3951f19a9cfae5d1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Isaak-Carter/Isaak-Carter_JOSIEv4o-8b-stage1-v4/e8bdfeef-9795-4b00-adec-6ac41c6718f7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Isaak-Carter_JOSIEv4o-8b-stage1-v4/1762652579.648735", - "retrieved_timestamp": "1762652579.648736", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Isaak-Carter/JOSIEv4o-8b-stage1-v4", - "developer": "Isaak-Carter", - "inference_platform": "unknown", - "id": "Isaak-Carter/JOSIEv4o-8b-stage1-v4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2552660274737696 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4724973116620121 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3654375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3316156914893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Isaak-Carter/Isaak-Carter_JOSIEv4o-8b-stage1-v4/f28b57ba-103a-41bb-93b0-7b25fd155351.json b/leaderboard_data/HFOpenLLMv2/Isaak-Carter/Isaak-Carter_JOSIEv4o-8b-stage1-v4/f28b57ba-103a-41bb-93b0-7b25fd155351.json deleted file mode 100644 index d854eea01b97d6f4c64d11d6a2d0530a0ff151ab..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Isaak-Carter/Isaak-Carter_JOSIEv4o-8b-stage1-v4/f28b57ba-103a-41bb-93b0-7b25fd155351.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Isaak-Carter_JOSIEv4o-8b-stage1-v4/1762652579.6489909", - "retrieved_timestamp": "1762652579.648992", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Isaak-Carter/JOSIEv4o-8b-stage1-v4", - "developer": "Isaak-Carter", - "inference_platform": "unknown", - "id": "Isaak-Carter/JOSIEv4o-8b-stage1-v4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2476972211509905 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4758066295235124 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3641041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32920545212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Isaak-Carter/Isaak-Carter_Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/817eb9e1-bd7d-4033-b0ea-bc7df58dc087.json b/leaderboard_data/HFOpenLLMv2/Isaak-Carter/Isaak-Carter_Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/817eb9e1-bd7d-4033-b0ea-bc7df58dc087.json deleted file mode 100644 index 7b3a4d42769219d286d55fa620867a79b2723ad3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Isaak-Carter/Isaak-Carter_Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/817eb9e1-bd7d-4033-b0ea-bc7df58dc087.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Isaak-Carter_Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/1762652579.649409", - "retrieved_timestamp": "1762652579.64941", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2", - "developer": "Isaak-Carter", - "inference_platform": "unknown", - "id": "Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7841039552830933 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5310923599182072 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47205438066465255 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43539583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4128158244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Isaak-Carter/Isaak-Carter_Josiefied-Qwen2.5-7B-Instruct-abliterated/2013b3a9-3644-4f66-9941-b5d2ba6e7b81.json b/leaderboard_data/HFOpenLLMv2/Isaak-Carter/Isaak-Carter_Josiefied-Qwen2.5-7B-Instruct-abliterated/2013b3a9-3644-4f66-9941-b5d2ba6e7b81.json deleted file mode 100644 index 9ba116a2c287b664ceb0e4a4923ffa7a7de8ec54..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Isaak-Carter/Isaak-Carter_Josiefied-Qwen2.5-7B-Instruct-abliterated/2013b3a9-3644-4f66-9941-b5d2ba6e7b81.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Isaak-Carter_Josiefied-Qwen2.5-7B-Instruct-abliterated/1762652579.6491818", - "retrieved_timestamp": "1762652579.649183", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated", - "developer": "Isaak-Carter", - "inference_platform": "unknown", - "id": "Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7317473193349202 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5396376284460921 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49244712990936557 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4086666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4276097074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/J-LAB/J-LAB_Thynk_orpo/3565fba3-e63d-49f8-9e8f-deef83531eb9.json b/leaderboard_data/HFOpenLLMv2/J-LAB/J-LAB_Thynk_orpo/3565fba3-e63d-49f8-9e8f-deef83531eb9.json deleted file mode 100644 index c67d593c23341ee9b9900ca386bd0baca4386a3e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/J-LAB/J-LAB_Thynk_orpo/3565fba3-e63d-49f8-9e8f-deef83531eb9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/J-LAB_Thynk_orpo/1762652579.649622", - "retrieved_timestamp": "1762652579.6496232", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "J-LAB/Thynk_orpo", - "developer": "J-LAB", - "inference_platform": "unknown", - "id": "J-LAB/Thynk_orpo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21017788357114678 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44631138778709606 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14803625377643503 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45147916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32313829787234044 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Casual-Magnum-34B/0b9358f8-1e27-448f-9932-1f2c6feac036.json b/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Casual-Magnum-34B/0b9358f8-1e27-448f-9932-1f2c6feac036.json deleted file mode 100644 index aa365b17d53b4e8979c2759fc0f16f398479bc4d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Casual-Magnum-34B/0b9358f8-1e27-448f-9932-1f2c6feac036.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Jacoby746_Casual-Magnum-34B/1762652579.65033", - "retrieved_timestamp": "1762652579.6503308", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Jacoby746/Casual-Magnum-34B", - "developer": "Jacoby746", - "inference_platform": "unknown", - "id": "Jacoby746/Casual-Magnum-34B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19301675110927893 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6032046880542974 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09214501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3724832214765101 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4077604166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5183676861702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Inf-Silent-Kunoichi-v0.1-2x7B/d1fa6abf-be2b-4ea6-bcbe-066ac37aa54f.json b/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Inf-Silent-Kunoichi-v0.1-2x7B/d1fa6abf-be2b-4ea6-bcbe-066ac37aa54f.json deleted file mode 100644 index c0b5bf2af747f0478f07fea6730f920107fc6bb2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Inf-Silent-Kunoichi-v0.1-2x7B/d1fa6abf-be2b-4ea6-bcbe-066ac37aa54f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Jacoby746_Inf-Silent-Kunoichi-v0.1-2x7B/1762652579.6505952", - "retrieved_timestamp": "1762652579.6505961", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Jacoby746/Inf-Silent-Kunoichi-v0.1-2x7B", - "developer": "Jacoby746", - "inference_platform": "unknown", - "id": "Jacoby746/Inf-Silent-Kunoichi-v0.1-2x7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38798166642286913 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.518546209727402 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07099697885196375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42804166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271276595744681 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Inf-Silent-Kunoichi-v0.2-2x7B/f611991b-11c1-4232-bc63-8cf2942605ae.json b/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Inf-Silent-Kunoichi-v0.2-2x7B/f611991b-11c1-4232-bc63-8cf2942605ae.json deleted file mode 100644 index 3989ce32015effcf7860d920a6e93cc897661658..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Inf-Silent-Kunoichi-v0.2-2x7B/f611991b-11c1-4232-bc63-8cf2942605ae.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Jacoby746_Inf-Silent-Kunoichi-v0.2-2x7B/1762652579.650832", - "retrieved_timestamp": "1762652579.650833", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Jacoby746/Inf-Silent-Kunoichi-v0.2-2x7B", - "developer": "Jacoby746", - "inference_platform": "unknown", - "id": "Jacoby746/Inf-Silent-Kunoichi-v0.2-2x7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3636019095998617 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5209417299963208 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43197916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32721077127659576 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Proto-Athena-4x7B/27d9d5c2-39d8-45e5-9614-a343144f05d8.json b/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Proto-Athena-4x7B/27d9d5c2-39d8-45e5-9614-a343144f05d8.json deleted file mode 100644 index af4a11e7b8faef1a6e9ed7179a9e3df9c700ebdc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Proto-Athena-4x7B/27d9d5c2-39d8-45e5-9614-a343144f05d8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Jacoby746_Proto-Athena-4x7B/1762652579.651071", - "retrieved_timestamp": "1762652579.651072", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Jacoby746/Proto-Athena-4x7B", - "developer": "Jacoby746", - "inference_platform": "unknown", - "id": "Jacoby746/Proto-Athena-4x7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37029636918930664 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5106547638742905 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43477083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32064494680851063 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.154 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Proto-Athena-v0.2-4x7B/060feab1-4ce6-44a9-8ae2-c06468dd4dc9.json b/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Proto-Athena-v0.2-4x7B/060feab1-4ce6-44a9-8ae2-c06468dd4dc9.json deleted file mode 100644 index b9d97bcdbb02fecba1c0c60ba081908b0106f121..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Proto-Athena-v0.2-4x7B/060feab1-4ce6-44a9-8ae2-c06468dd4dc9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Jacoby746_Proto-Athena-v0.2-4x7B/1762652579.651291", - "retrieved_timestamp": "1762652579.6512918", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Jacoby746/Proto-Athena-v0.2-4x7B", - "developer": "Jacoby746", - "inference_platform": "unknown", - "id": "Jacoby746/Proto-Athena-v0.2-4x7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37524213531208306 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5067731005424964 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42128125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3197307180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.154 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Proto-Harpy-Blazing-Light-v0.1-2x7B/f7455f30-e04e-4bc6-9d71-e33272d4577c.json b/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Proto-Harpy-Blazing-Light-v0.1-2x7B/f7455f30-e04e-4bc6-9d71-e33272d4577c.json deleted file mode 100644 index dac4e9f499b6bc4c46c07642d373c96b62d29dea..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Proto-Harpy-Blazing-Light-v0.1-2x7B/f7455f30-e04e-4bc6-9d71-e33272d4577c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Jacoby746_Proto-Harpy-Blazing-Light-v0.1-2x7B/1762652579.651509", - "retrieved_timestamp": "1762652579.65151", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Jacoby746/Proto-Harpy-Blazing-Light-v0.1-2x7B", - "developer": "Jacoby746", - "inference_platform": "unknown", - "id": "Jacoby746/Proto-Harpy-Blazing-Light-v0.1-2x7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4904719477652628 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5186849053052595 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07477341389728097 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44496874999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33011968085106386 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Proto-Harpy-Spark-v0.1-7B/420cf07c-f043-49db-a62d-91e0c21aff2f.json b/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Proto-Harpy-Spark-v0.1-7B/420cf07c-f043-49db-a62d-91e0c21aff2f.json deleted file mode 100644 index 0d64a109d31e646433b0af2a899c49ab26fc5d3d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Proto-Harpy-Spark-v0.1-7B/420cf07c-f043-49db-a62d-91e0c21aff2f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Jacoby746_Proto-Harpy-Spark-v0.1-7B/1762652579.651721", - "retrieved_timestamp": "1762652579.651722", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Jacoby746/Proto-Harpy-Spark-v0.1-7B", - "developer": "Jacoby746", - "inference_platform": "unknown", - "id": "Jacoby746/Proto-Harpy-Spark-v0.1-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43326928106313467 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4735771808296548 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43166666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30693151595744683 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen-0.5B-DPO-1epoch/7da8cc7e-791f-420d-9004-b29ddf54e381.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen-0.5B-DPO-1epoch/7da8cc7e-791f-420d-9004-b29ddf54e381.json deleted file mode 100644 index ea513cdb8422a609d6f06055482171af1f1cbc9f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen-0.5B-DPO-1epoch/7da8cc7e-791f-420d-9004-b29ddf54e381.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-DPO-1epoch/1762652579.651926", - "retrieved_timestamp": "1762652579.651926", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen-0.5B-DPO-1epoch", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen-0.5B-DPO-1epoch" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26473313031644924 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31907502434278595 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33517708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15575132978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen-0.5B-DPO-5epoch/42960491-549f-42bb-9669-5231ca0c436b.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen-0.5B-DPO-5epoch/42960491-549f-42bb-9669-5231ca0c436b.json deleted file mode 100644 index 5ed4c54e5c6b5544a087a650856d0d87526d3948..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen-0.5B-DPO-5epoch/42960491-549f-42bb-9669-5231ca0c436b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-DPO-5epoch/1762652579.65218", - "retrieved_timestamp": "1762652579.652181", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen-0.5B-DPO-5epoch", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen-0.5B-DPO-5epoch" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25701472094043804 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3112109544868782 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33796875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15325797872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1/46c6ab7f-33a0-4e72-9a63-b24da3f9c4d6.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1/46c6ab7f-33a0-4e72-9a63-b24da3f9c4d6.json deleted file mode 100644 index c549549686e65e871b7b6960f638dfed7ba11f69..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1/46c6ab7f-33a0-4e72-9a63-b24da3f9c4d6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1/1762652579.653574", - "retrieved_timestamp": "1762652579.653575", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24687274210206694 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3260313037664168 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06495468277945618 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34336458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1574966755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1/1ff4251b-d01a-4ced-8868-776210e1ecb6.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1/1ff4251b-d01a-4ced-8868-776210e1ecb6.json deleted file mode 100644 index 780b927eeff17fc43315399cc80c737949bd8426..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1/1ff4251b-d01a-4ced-8868-776210e1ecb6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1/1762652579.6538298", - "retrieved_timestamp": "1762652579.6538298", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2605863553150086 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3308028437367363 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16256648936170212 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1/c3c5cb61-3c4f-4796-9d3c-493618db0f91.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1/c3c5cb61-3c4f-4796-9d3c-493618db0f91.json deleted file mode 100644 index 31939aed089275eb7157b253e38d5879ead6590e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1/c3c5cb61-3c4f-4796-9d3c-493618db0f91.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1/1762652579.654063", - "retrieved_timestamp": "1762652579.6540642", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2529178136234081 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3261949089625076 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.330125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15757978723404256 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-Instruct-SFT/48e6f9aa-5034-4653-8832-b0a16bf01079.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-Instruct-SFT/48e6f9aa-5034-4653-8832-b0a16bf01079.json deleted file mode 100644 index b4c1ff825f7fe504791a994b38a90b60c14a1bb4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-Instruct-SFT/48e6f9aa-5034-4653-8832-b0a16bf01079.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-Instruct-SFT/1762652579.65331", - "retrieved_timestamp": "1762652579.653311", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27677340567472086 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3253697801563151 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33415625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15201130319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-2ep/00efca13-0d04-4700-a90f-bd621a971555.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-2ep/00efca13-0d04-4700-a90f-bd621a971555.json deleted file mode 100644 index bf9ed72a2863a29a99b87375d8c6c43f64ff92c2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-2ep/00efca13-0d04-4700-a90f-bd621a971555.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-2ep/1762652579.654743", - "retrieved_timestamp": "1762652579.6547441", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-2ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-2ep" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2140498322229462 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3172227797719337 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24664429530201343 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34727083333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15367353723404256 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-3ep/f357f4eb-1837-4ab2-ad4b-9cc8a9054517.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-3ep/f357f4eb-1837-4ab2-ad4b-9cc8a9054517.json deleted file mode 100644 index 1966e91ff858ea2ec63fdae7d498522fe3fe5906..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-3ep/f357f4eb-1837-4ab2-ad4b-9cc8a9054517.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-3ep/1762652579.6549618", - "retrieved_timestamp": "1762652579.654963", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-3ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-3ep" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22573992561957826 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3064261556890236 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36606249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15317486702127658 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-5ep/206c756e-1edc-491f-9f86-7e00c7ab7085.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-5ep/206c756e-1edc-491f-9f86-7e00c7ab7085.json deleted file mode 100644 index 81e0cdb9275f3090704c6c31599d0d2fb9e04cb0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-5ep/206c756e-1edc-491f-9f86-7e00c7ab7085.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-5ep/1762652579.655172", - "retrieved_timestamp": "1762652579.655173", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-5ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-5ep" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19868726091215752 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31044747322019184 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3406666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15575132978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-4/7d591ed9-5802-43a3-bb38-ec45b69adb08.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-4/7d591ed9-5802-43a3-bb38-ec45b69adb08.json deleted file mode 100644 index d90084860acfa23d7d0a2c34faef2c6b569dec74..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-4/7d591ed9-5802-43a3-bb38-ec45b69adb08.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-4/1762652579.654527", - "retrieved_timestamp": "1762652579.654527", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2019596891802639 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3017092819749249 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3446354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16190159574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-2ep/fde79985-6832-4315-8650-fdcf9ad68087.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-2ep/fde79985-6832-4315-8650-fdcf9ad68087.json deleted file mode 100644 index df967183fadbfd6b8fc08530e58a0253dbd420d8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-2ep/fde79985-6832-4315-8650-fdcf9ad68087.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-2ep/1762652579.655605", - "retrieved_timestamp": "1762652579.655606", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-2ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-2ep" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19706379074189817 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3224699194774388 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3367604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1651429521276596 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-3ep/aef8fd41-ac51-4fb5-b8ae-78ebca9b4215.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-3ep/aef8fd41-ac51-4fb5-b8ae-78ebca9b4215.json deleted file mode 100644 index 5ccf89b42c945752a3c3790999c367670f0bea6b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-3ep/aef8fd41-ac51-4fb5-b8ae-78ebca9b4215.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-3ep/1762652579.655815", - "retrieved_timestamp": "1762652579.6558158", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-3ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-3ep" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2241164554493189 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32468117082421427 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3353333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16888297872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-5ep/b5cdb9c2-d81a-4e0b-817a-3e101d122e7a.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-5ep/b5cdb9c2-d81a-4e0b-817a-3e101d122e7a.json deleted file mode 100644 index 33f1c3627536969ce3a8d3c373f73948fcafa8a6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-5ep/b5cdb9c2-d81a-4e0b-817a-3e101d122e7a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-5ep/1762652579.656047", - "retrieved_timestamp": "1762652579.656048", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-5ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-5ep" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22918744486850445 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3259343389530942 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3235208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16879986702127658 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-5/3eac4497-66af-4fc6-bf89-459631e4a418.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-5/3eac4497-66af-4fc6-bf89-459631e4a418.json deleted file mode 100644 index ee3093249cc664e698b7c4c514e9a95c6713d317..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-5/3eac4497-66af-4fc6-bf89-459631e4a418.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-5/1762652579.6553931", - "retrieved_timestamp": "1762652579.655394", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1985875255433361 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3139860294769257 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34603125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1697972074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-2ep/9d58433f-a74c-4345-bd47-a8f2c4e2361e.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-2ep/9d58433f-a74c-4345-bd47-a8f2c4e2361e.json deleted file mode 100644 index adac30fbeeb0196fc19008768d63fbc99c970fa5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-2ep/9d58433f-a74c-4345-bd47-a8f2c4e2361e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-2ep/1762652579.656457", - "retrieved_timestamp": "1762652579.656457", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-2ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-2ep" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18307535117931534 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29839616748934167 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2424496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3567604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1484375 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-3ep/e8109e5c-6276-4935-bfa0-fc969f118d3b.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-3ep/e8109e5c-6276-4935-bfa0-fc969f118d3b.json deleted file mode 100644 index 66821f8bf4551461b9c846b3318359f0d681ad43..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-3ep/e8109e5c-6276-4935-bfa0-fc969f118d3b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-3ep/1762652579.656671", - "retrieved_timestamp": "1762652579.656672", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-3ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-3ep" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1989620872617987 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3109875129533253 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3449479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14162234042553193 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-5ep/9d6b36c5-c0ec-4ab1-a12b-47efc34ebfc8.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-5ep/9d6b36c5-c0ec-4ab1-a12b-47efc34ebfc8.json deleted file mode 100644 index 872b240379c2eac54f8cb2c405dcb6819ec58b54..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-5ep/9d6b36c5-c0ec-4ab1-a12b-47efc34ebfc8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-5ep/1762652579.656877", - "retrieved_timestamp": "1762652579.656878", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-5ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-5ep" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18971994308434953 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936418449815176 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38739583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13364361702127658 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-4/5e307ea5-70da-476a-8d9e-1d488385565f.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-4/5e307ea5-70da-476a-8d9e-1d488385565f.json deleted file mode 100644 index 6e55d96ceda351608949e7ed4dbc1dd4dfeddfdf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-4/5e307ea5-70da-476a-8d9e-1d488385565f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-4/1762652579.656255", - "retrieved_timestamp": "1762652579.656256", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2034335562972912 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2935549587263229 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3434270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14128989361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam/343b7db1-8f96-4998-a6fb-5eb0aa1b6b21.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam/343b7db1-8f96-4998-a6fb-5eb0aa1b6b21.json deleted file mode 100644 index b011fb41ead7a2d5026abe0aa129cda90d0fe4ad..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam/343b7db1-8f96-4998-a6fb-5eb0aa1b6b21.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam/1762652579.6580968", - "retrieved_timestamp": "1762652579.658098", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24105262924595627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31671815484837784 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.330125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15625 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam/bfa11262-d7bd-44b3-8b8b-81013f1e0c24.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam/bfa11262-d7bd-44b3-8b8b-81013f1e0c24.json deleted file mode 100644 index 10ec819145529016434be6807df27764fc56572e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam/bfa11262-d7bd-44b3-8b8b-81013f1e0c24.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam/1762652579.658342", - "retrieved_timestamp": "1762652579.6583428", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23685598656010498 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3260038632940968 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3355208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15699800531914893 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam/902849f8-dc58-4e01-ba30-ff95412272d3.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam/902849f8-dc58-4e01-ba30-ff95412272d3.json deleted file mode 100644 index 7a6c3be960dc44660a308b3a8dceb42b12429559..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam/902849f8-dc58-4e01-ba30-ff95412272d3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam/1762652579.6585789", - "retrieved_timestamp": "1762652579.65858", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22623971063444992 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3261540051256346 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3408229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15408909574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam/4c5cace1-70ce-48f3-aad1-d141924c24de.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam/4c5cace1-70ce-48f3-aad1-d141924c24de.json deleted file mode 100644 index 10973e16c32c896edc716d894de1f4f634ee00e5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam/4c5cace1-70ce-48f3-aad1-d141924c24de.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam/1762652579.6588218", - "retrieved_timestamp": "1762652579.658823", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25079455843827714 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3199331515135054 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33545833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15550199468085107 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam/e42051f2-90f2-4fbe-a4bd-623482abf10f.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam/e42051f2-90f2-4fbe-a4bd-623482abf10f.json deleted file mode 100644 index 63a1ad5574077283d87be6a18fd11573c3fc8d9d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam/e42051f2-90f2-4fbe-a4bd-623482abf10f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam/1762652579.6591082", - "retrieved_timestamp": "1762652579.659109", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.238979241745236 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31816042712158116 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33279166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15600066489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam/e70423b6-5a7d-4745-b5a3-968f363a3b7a.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam/e70423b6-5a7d-4745-b5a3-968f363a3b7a.json deleted file mode 100644 index eea17ecf3360d1277d2601549c91e168b7ff5520..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam/e70423b6-5a7d-4745-b5a3-968f363a3b7a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam/1762652579.6593359", - "retrieved_timestamp": "1762652579.659337", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2423015376977531 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3154080373582542 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33279166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15475398936170212 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam/2a7b8fa7-5c16-414b-968e-ec7b06e8143c.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam/2a7b8fa7-5c16-414b-968e-ec7b06e8143c.json deleted file mode 100644 index 2cfbbd2f508052afcd63658bd0a4cca50df5f18b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam/2a7b8fa7-5c16-414b-968e-ec7b06e8143c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam/1762652579.6595562", - "retrieved_timestamp": "1762652579.659557", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24932069132124984 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3189717077702392 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.334125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15608377659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam/dfa1b391-4b18-4ac0-a397-a983070647a7.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam/dfa1b391-4b18-4ac0-a397-a983070647a7.json deleted file mode 100644 index c6c104c7161f07f49f4368d1f4d4b77ec5ab8ae9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam/dfa1b391-4b18-4ac0-a397-a983070647a7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam/1762652579.660001", - "retrieved_timestamp": "1762652579.660005", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2541667220752049 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31671883869615397 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32885416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15799534574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam/96d31674-0011-4621-9131-31b5f6ede223.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam/96d31674-0011-4621-9131-31b5f6ede223.json deleted file mode 100644 index 1e10943dc6d4d5211fda04c99de81bafb88ef97f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam/96d31674-0011-4621-9131-31b5f6ede223.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam/1762652579.660342", - "retrieved_timestamp": "1762652579.660342", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24507418095098782 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3159533058861391 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3301875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15608377659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam/d8663966-a5f5-40e6-a327-1255f7c3395f.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam/d8663966-a5f5-40e6-a327-1255f7c3395f.json deleted file mode 100644 index 665c31d06a2ff1b731ad68cca1507c89b5261103..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam/d8663966-a5f5-40e6-a327-1255f7c3395f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam/1762652579.6605709", - "retrieved_timestamp": "1762652579.6605718", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25574032456105356 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31419826948787827 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3315208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1574966755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam/a1fadf30-c543-4b73-bf28-0cb9cb2fc91f.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam/a1fadf30-c543-4b73-bf28-0cb9cb2fc91f.json deleted file mode 100644 index 934cd9bb3a4d34ea62953851aeecaeeb7dd18030..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam/a1fadf30-c543-4b73-bf28-0cb9cb2fc91f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam/1762652579.660821", - "retrieved_timestamp": "1762652579.660822", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26053648763059795 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3166968072745491 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.334125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15766289893617022 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam/57b69bd0-73f6-42e0-bd9e-984bb1e6a553.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam/57b69bd0-73f6-42e0-bd9e-984bb1e6a553.json deleted file mode 100644 index 75b1ca096c6d75c5b6addc165c5bd85e75ca6d4d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam/57b69bd0-73f6-42e0-bd9e-984bb1e6a553.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam/1762652579.661046", - "retrieved_timestamp": "1762652579.661047", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25781371206177384 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31732037273750646 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.035498489425981876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32879166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1583277925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam/93597efa-6da8-4074-8049-6ec66f499cbf.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam/93597efa-6da8-4074-8049-6ec66f499cbf.json deleted file mode 100644 index 08b3069adbfee31c0448fe9ef40fd62e85911baf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam/93597efa-6da8-4074-8049-6ec66f499cbf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam/1762652579.661258", - "retrieved_timestamp": "1762652579.661259", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23353369060758786 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3197619098572027 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348994 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32755208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1580784574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam/00a5dc4a-6ffb-4e6a-9547-416ff29e0ded.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam/00a5dc4a-6ffb-4e6a-9547-416ff29e0ded.json deleted file mode 100644 index 2d14bdae9cceb4ff005c9f2ce2528e6958cb80ea..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam/00a5dc4a-6ffb-4e6a-9547-416ff29e0ded.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam/1762652579.6614761", - "retrieved_timestamp": "1762652579.6614761", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24719743613611883 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32262707839652854 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32621875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15375664893617022 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam/13cf92c4-fbeb-445a-85d6-bf71ce2e68c9.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam/13cf92c4-fbeb-445a-85d6-bf71ce2e68c9.json deleted file mode 100644 index f3e8872679f5575bdbfb948c88541e48f3113ec9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam/13cf92c4-fbeb-445a-85d6-bf71ce2e68c9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam/1762652579.661691", - "retrieved_timestamp": "1762652579.661692", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2474223948013493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32291208173140107 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32748958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15392287234042554 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam/14a173b6-4d56-4d22-a888-57ea46d72e67.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam/14a173b6-4d56-4d22-a888-57ea46d72e67.json deleted file mode 100644 index 70d5aec592e87582c90c32bd5cc6de8892e9a674..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam/14a173b6-4d56-4d22-a888-57ea46d72e67.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam/1762652579.6619039", - "retrieved_timestamp": "1762652579.6619048", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24027801788144343 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32453683161596314 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32621875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1573304521276596 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam/f46cc7cb-27e8-4723-9ecf-cbeef9789b25.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam/f46cc7cb-27e8-4723-9ecf-cbeef9789b25.json deleted file mode 100644 index 661a5020640221096403b8f805b96818a11d3b6a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam/f46cc7cb-27e8-4723-9ecf-cbeef9789b25.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam/1762652579.662116", - "retrieved_timestamp": "1762652579.662117", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23680611887569425 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3224293761524927 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33548958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15159574468085107 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam/de200bef-71a2-4efb-bc34-02f69385b636.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam/de200bef-71a2-4efb-bc34-02f69385b636.json deleted file mode 100644 index e2c666a3c35324671f874cc15458065636aa59ac..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam/de200bef-71a2-4efb-bc34-02f69385b636.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam/1762652579.662327", - "retrieved_timestamp": "1762652579.662327", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23718068059415687 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32477052921998556 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3394270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1550033244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam/7ed1ff6a-fe4d-4f78-bbc6-c5e64a7fbfc1.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam/7ed1ff6a-fe4d-4f78-bbc6-c5e64a7fbfc1.json deleted file mode 100644 index 6dbffccf7f6cd5176b97a52261dbf23958b5a086..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam/7ed1ff6a-fe4d-4f78-bbc6-c5e64a7fbfc1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam/1762652579.6625469", - "retrieved_timestamp": "1762652579.662548", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24992021170494289 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31806007750183346 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15741356382978725 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam/82d38084-32b1-4224-810c-b66dd337b3fe.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam/82d38084-32b1-4224-810c-b66dd337b3fe.json deleted file mode 100644 index 44d7b9796c9ebbf8cd58a36518a5716dad2f555c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam/82d38084-32b1-4224-810c-b66dd337b3fe.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam/1762652579.662755", - "retrieved_timestamp": "1762652579.662755", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23810489501190177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32421844512358233 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3328229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15724734042553193 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam/972e0d76-63bb-431b-9d9b-68dd6b738447.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam/972e0d76-63bb-431b-9d9b-68dd6b738447.json deleted file mode 100644 index ed1719615b785fd08bd70b88a0655c992923268b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam/972e0d76-63bb-431b-9d9b-68dd6b738447.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam/1762652579.662969", - "retrieved_timestamp": "1762652579.662969", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2420765790325226 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3224798177796032 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3408229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14960106382978725 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam/7337bc31-54b6-43b9-bb26-63f2273ffc7e.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam/7337bc31-54b6-43b9-bb26-63f2273ffc7e.json deleted file mode 100644 index 7304968d3dba551508faa3f2eb70cc7502ab368c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam/7337bc31-54b6-43b9-bb26-63f2273ffc7e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam/1762652579.663178", - "retrieved_timestamp": "1762652579.663179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23805502732749106 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32652003776870003 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0445619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34079166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14985039893617022 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam/c2e14e90-6c18-4a9f-9d68-a9d98960dd32.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam/c2e14e90-6c18-4a9f-9d68-a9d98960dd32.json deleted file mode 100644 index 83441688d5e47fa989aad53e485e1dc89c4d9252..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam/c2e14e90-6c18-4a9f-9d68-a9d98960dd32.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam/1762652579.663386", - "retrieved_timestamp": "1762652579.663386", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25264298727376694 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3176911636441555 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33415625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15724734042553193 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam/972d45c5-acd1-4e54-8310-9ff56c5fb061.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam/972d45c5-acd1-4e54-8310-9ff56c5fb061.json deleted file mode 100644 index 91f3781a85b22153b7b0e98882eb4a86c543c935..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam/972d45c5-acd1-4e54-8310-9ff56c5fb061.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam/1762652579.6636329", - "retrieved_timestamp": "1762652579.6636338", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24572356901909154 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.316045450978746 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0445619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33015625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15716422872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam/2faf738f-64f4-4e14-8011-9e00a4e2dd6a.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam/2faf738f-64f4-4e14-8011-9e00a4e2dd6a.json deleted file mode 100644 index df7b41e16a5f46abd4b64db02adbdaa08c890d32..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam/2faf738f-64f4-4e14-8011-9e00a4e2dd6a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam/1762652579.663875", - "retrieved_timestamp": "1762652579.663876", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2441998342176536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3193544697854515 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33148958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1566655585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam/15b28d99-e02a-4021-899b-adef87dfe96a.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam/15b28d99-e02a-4021-899b-adef87dfe96a.json deleted file mode 100644 index 68a2f42856f1a84ebff713f8a88e4a30cd175d08..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam/15b28d99-e02a-4021-899b-adef87dfe96a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam/1762652579.6641018", - "retrieved_timestamp": "1762652579.664103", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26036139664977814 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31784656431310543 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.035498489425981876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15674867021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam/b643171e-adaa-4f6e-8860-542950810578.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam/b643171e-adaa-4f6e-8860-542950810578.json deleted file mode 100644 index 36b43cb3e37bc6cc42e8e9bed3856503cb8d1b99..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam/b643171e-adaa-4f6e-8860-542950810578.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam/1762652579.664332", - "retrieved_timestamp": "1762652579.664333", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24899599728719796 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3172899997448431 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3301875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15691489361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam/a26204c0-90c5-44fd-8814-d69c6e4f4585.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam/a26204c0-90c5-44fd-8814-d69c6e4f4585.json deleted file mode 100644 index 8d12dd0b0389ac52ac15ba72309dff0d9f23c059..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam/a26204c0-90c5-44fd-8814-d69c6e4f4585.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam/1762652579.6645608", - "retrieved_timestamp": "1762652579.664562", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26036139664977814 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3149566664115098 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3341875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15658244680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam/bc45fc30-c472-471a-b0c8-f68b9397d844.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam/bc45fc30-c472-471a-b0c8-f68b9397d844.json deleted file mode 100644 index 71ca238e09c3d791932fd4dbceb714974ddac9cb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam/bc45fc30-c472-471a-b0c8-f68b9397d844.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam/1762652579.664829", - "retrieved_timestamp": "1762652579.66483", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550410688085391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3211026993947845 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32876041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15708111702127658 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam/dff1ec0f-99a6-493d-9f2c-a6a523455b7e.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam/dff1ec0f-99a6-493d-9f2c-a6a523455b7e.json deleted file mode 100644 index a44d8dc1f10f5a386e0868fc7291b69290a3422c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam/dff1ec0f-99a6-493d-9f2c-a6a523455b7e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam/1762652579.665046", - "retrieved_timestamp": "1762652579.665047", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24779695651981187 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3197773660515741 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33145833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15866023936170212 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam/a6385d82-407e-44b2-9148-9cbf8f353557.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam/a6385d82-407e-44b2-9148-9cbf8f353557.json deleted file mode 100644 index c31a795660f47dc096f56e7087d7d27ad97c3a46..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam/a6385d82-407e-44b2-9148-9cbf8f353557.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam/1762652579.6652648", - "retrieved_timestamp": "1762652579.665266", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24747226248576 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32246983072126806 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.330125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15558510638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam/17fb5411-3dc6-44b7-971b-8a080ed93de0.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam/17fb5411-3dc6-44b7-971b-8a080ed93de0.json deleted file mode 100644 index 80e4a73e84d9931904cacd47e7f13ffad44e3195..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam/17fb5411-3dc6-44b7-971b-8a080ed93de0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam/1762652579.665471", - "retrieved_timestamp": "1762652579.665472", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2590127528291599 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3185132309797721 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3275208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15857712765957446 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam/670b89a5-2a83-480e-a33b-6903609a10dc.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam/670b89a5-2a83-480e-a33b-6903609a10dc.json deleted file mode 100644 index def76622b23ec0279f5318dd7a2780bc588e3cfd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam/670b89a5-2a83-480e-a33b-6903609a10dc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam/1762652579.665683", - "retrieved_timestamp": "1762652579.665684", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23233464984020177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179474145066817 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15475398936170212 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam/e660922f-847b-4993-91a4-b96809ff1e85.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam/e660922f-847b-4993-91a4-b96809ff1e85.json deleted file mode 100644 index 6c72674f847d3bc10ef647bd2f9e5ffa58b9104d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam/e660922f-847b-4993-91a4-b96809ff1e85.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam/1762652579.665889", - "retrieved_timestamp": "1762652579.66589", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23151017079127825 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3259705145690442 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3383125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15209441489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam/41d18fa1-d19e-47cf-8fec-b04725ff097f.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam/41d18fa1-d19e-47cf-8fec-b04725ff097f.json deleted file mode 100644 index 99799658c8418e4647d70bbbd6aa5826f63fff4b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam/41d18fa1-d19e-47cf-8fec-b04725ff097f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam/1762652579.666097", - "retrieved_timestamp": "1762652579.6660979", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2298368329366082 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33204616486918276 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33288541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15674867021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam/122a997d-f452-4511-96f3-f31ecb5d8d7b.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam/122a997d-f452-4511-96f3-f31ecb5d8d7b.json deleted file mode 100644 index 293f5705039bf91f26c64625c34c8db71922f786..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam/122a997d-f452-4511-96f3-f31ecb5d8d7b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam/1762652579.666312", - "retrieved_timestamp": "1762652579.666313", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24687274210206694 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3178544697854515 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33015625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1574966755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam/c0d7514b-6809-49d7-9193-38e9c9ad03be.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam/c0d7514b-6809-49d7-9193-38e9c9ad03be.json deleted file mode 100644 index 56db8cb812f80294cbf00f2d533da7357859e459..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam/c0d7514b-6809-49d7-9193-38e9c9ad03be.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam/1762652579.666527", - "retrieved_timestamp": "1762652579.666527", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2520434668900739 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3167822100533442 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3328229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15757978723404256 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam/923f6446-f9fb-47ae-b585-ac131d75c107.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam/923f6446-f9fb-47ae-b585-ac131d75c107.json deleted file mode 100644 index 78987f399f082c4a0579d4ab7477484d858ad4cd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam/923f6446-f9fb-47ae-b585-ac131d75c107.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam/1762652579.6667368", - "retrieved_timestamp": "1762652579.666738", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2665815591519391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3190675981811982 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32885416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1566655585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam/da330322-f144-44bb-833a-7b92c11f3888.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam/da330322-f144-44bb-833a-7b92c11f3888.json deleted file mode 100644 index 4de917bf9d2b31894a45848a4c036cc05b9b4f34..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam/da330322-f144-44bb-833a-7b92c11f3888.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam/1762652579.667231", - "retrieved_timestamp": "1762652579.667236", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24992021170494289 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31779941873624934 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.334125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15625 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam/10014f98-cae2-435b-b6e7-17064bb079a5.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam/10014f98-cae2-435b-b6e7-17064bb079a5.json deleted file mode 100644 index 3d3080b24ddc0e0062e797811ce73130b658c5ae..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam/10014f98-cae2-435b-b6e7-17064bb079a5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam/1762652579.6676302", - "retrieved_timestamp": "1762652579.6676311", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24170201731406002 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3178391594145879 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33279166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1574966755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam/c6d4f510-abc8-4524-99b0-e6d98c6e9aa9.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam/c6d4f510-abc8-4524-99b0-e6d98c6e9aa9.json deleted file mode 100644 index a4f4ad5fdf93c1efa40455c3ee045f73aa9f1346..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam/c6d4f510-abc8-4524-99b0-e6d98c6e9aa9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam/1762652579.66787", - "retrieved_timestamp": "1762652579.667871", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2562401095759252 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31904280434381205 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.334125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15757978723404256 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam/b4d7f827-d1cb-46c6-9eea-248867fdc07f.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam/b4d7f827-d1cb-46c6-9eea-248867fdc07f.json deleted file mode 100644 index 67331d71606b257dbd2edd0e814d1c8144b02c6a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam/b4d7f827-d1cb-46c6-9eea-248867fdc07f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam/1762652579.6680949", - "retrieved_timestamp": "1762652579.6680949", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2408276705807258 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31647277641099675 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3315208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1556682180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam/d1d2f75d-ddd8-42cb-9de8-1f327479eb9b.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam/d1d2f75d-ddd8-42cb-9de8-1f327479eb9b.json deleted file mode 100644 index 618840afd3b2bc6245cb53fc11464b95595d0076..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam/d1d2f75d-ddd8-42cb-9de8-1f327479eb9b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam/1762652579.668304", - "retrieved_timestamp": "1762652579.668305", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24812165055386376 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3204166266783764 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3301875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15915890957446807 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam/9df1e491-fa9d-41c7-ae46-8cc70a47a60f.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam/9df1e491-fa9d-41c7-ae46-8cc70a47a60f.json deleted file mode 100644 index 8c0f6f95c12285b757c8fcb753925f7937facbd0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam/9df1e491-fa9d-41c7-ae46-8cc70a47a60f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam/1762652579.668525", - "retrieved_timestamp": "1762652579.6685262", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2544914161092568 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3185709286639082 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32885416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15608377659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam/6c070a2b-9f5e-46cd-b8ba-b6220509b85d.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam/6c070a2b-9f5e-46cd-b8ba-b6220509b85d.json deleted file mode 100644 index 275cfeeedcf04168bd2f61e559dd10604042bfa1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam/6c070a2b-9f5e-46cd-b8ba-b6220509b85d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam/1762652579.668755", - "retrieved_timestamp": "1762652579.668756", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2519935992056632 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.320368681472897 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32615625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15375664893617022 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam/4496da44-d4bd-40a8-8f91-56b2cb2fa766.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam/4496da44-d4bd-40a8-8f91-56b2cb2fa766.json deleted file mode 100644 index 5fb7abe0c8c5ab917376b7b32c5fb7801a15164a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam/4496da44-d4bd-40a8-8f91-56b2cb2fa766.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam/1762652579.6689868", - "retrieved_timestamp": "1762652579.668988", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23146030310686755 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32128474090743103 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32221875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15824468085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam/69c6593c-6e84-498f-8d68-62c1809a4606.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam/69c6593c-6e84-498f-8d68-62c1809a4606.json deleted file mode 100644 index eca99f8af4a481673e2529bb618b07090eb3b439..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam/69c6593c-6e84-498f-8d68-62c1809a4606.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam/1762652579.669204", - "retrieved_timestamp": "1762652579.669204", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25149381419079153 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31867127828365593 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32888541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15392287234042554 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam/b1c0f775-987a-4da5-9451-09bf295b16ba.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam/b1c0f775-987a-4da5-9451-09bf295b16ba.json deleted file mode 100644 index 4c7ae83da408a3ff529ee136ebc36f4bd521310a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam/b1c0f775-987a-4da5-9451-09bf295b16ba.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam/1762652579.669419", - "retrieved_timestamp": "1762652579.66942", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24719743613611883 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213274785812292 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3261875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15882646276595744 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam/c589d3d6-9d8b-45e3-a6c6-60f25d44349b.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam/c589d3d6-9d8b-45e3-a6c6-60f25d44349b.json deleted file mode 100644 index ec9e8d468a1210c9ecbd772358e47df84536edfa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam/c589d3d6-9d8b-45e3-a6c6-60f25d44349b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam/1762652579.6696231", - "retrieved_timestamp": "1762652579.669624", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24599839536873275 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32337658694524307 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33021875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15334109042553193 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam/1e76e5ee-1728-4756-8f13-d68ce1ca3a5e.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam/1e76e5ee-1728-4756-8f13-d68ce1ca3a5e.json deleted file mode 100644 index 74d17a63602389cc0215385d64ca3e83ab28e7b8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam/1e76e5ee-1728-4756-8f13-d68ce1ca3a5e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam/1762652579.669835", - "retrieved_timestamp": "1762652579.669836", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25236816092412573 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3255638228201855 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33679166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15309175531914893 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam/a44985f9-2255-421b-93b9-fcb5761e17b8.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam/a44985f9-2255-421b-93b9-fcb5761e17b8.json deleted file mode 100644 index c17027af336e2b6fd26240e475d6f6359f64d1dc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam/a44985f9-2255-421b-93b9-fcb5761e17b8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam/1762652579.670048", - "retrieved_timestamp": "1762652579.670049", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2264646692996804 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3252098558034601 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32615625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1568317819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam/ad59cc80-784d-41bf-9a3e-9d9f286667d2.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam/ad59cc80-784d-41bf-9a3e-9d9f286667d2.json deleted file mode 100644 index 209d0c973a6eba92f613123243cf069bb71cf2e9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam/ad59cc80-784d-41bf-9a3e-9d9f286667d2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam/1762652579.6702561", - "retrieved_timestamp": "1762652579.6702569", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23016152697066006 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3224479825736107 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34079166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15001662234042554 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam/0b72d3c8-aaff-4eca-854d-07d132e9aa25.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam/0b72d3c8-aaff-4eca-854d-07d132e9aa25.json deleted file mode 100644 index 1a86e6808b9d0f10cf5b71d14bf6e537f5b13333..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam/0b72d3c8-aaff-4eca-854d-07d132e9aa25.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam/1762652579.670511", - "retrieved_timestamp": "1762652579.6705122", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25236816092412573 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3278027492189594 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33945833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15209441489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam/021eca20-1a26-4eba-9006-fb005e91696d.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam/021eca20-1a26-4eba-9006-fb005e91696d.json deleted file mode 100644 index 4542ead2cd172bf18055a8a660c880e71826b325..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam/021eca20-1a26-4eba-9006-fb005e91696d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam/1762652579.67072", - "retrieved_timestamp": "1762652579.67072", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2657570801030156 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31752113645211816 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3301875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1574966755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam/8662faaa-8964-468a-991b-43b2f0449d48.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam/8662faaa-8964-468a-991b-43b2f0449d48.json deleted file mode 100644 index b253d3bcad16d6d5d4395c6b3ab5af65babecab1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam/8662faaa-8964-468a-991b-43b2f0449d48.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam/1762652579.6709208", - "retrieved_timestamp": "1762652579.6709208", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2487211709375568 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3189091360416723 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3275208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15949135638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam/56cad8c7-566f-46e5-9692-3c11f4408921.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam/56cad8c7-566f-46e5-9692-3c11f4408921.json deleted file mode 100644 index 7e0155335a1af80af42d0db1923455da8de211ec..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam/56cad8c7-566f-46e5-9692-3c11f4408921.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam/1762652579.671123", - "retrieved_timestamp": "1762652579.671123", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2560151509106947 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3158776856286612 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3275208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15616688829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam/f86fb81b-29b8-425f-8129-ea054108a214.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam/f86fb81b-29b8-425f-8129-ea054108a214.json deleted file mode 100644 index 8bd27e8eb43f5a733d92c03d1741956bc727c1f9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam/f86fb81b-29b8-425f-8129-ea054108a214.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam/1762652579.671335", - "retrieved_timestamp": "1762652579.671336", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2498703440205322 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31561997255280577 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3301875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15558510638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam/3c5ff9bc-b33a-4557-9c76-ccc041de985c.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam/3c5ff9bc-b33a-4557-9c76-ccc041de985c.json deleted file mode 100644 index 8d08a071a1635dba3e9a2d1e6b6434cfe61a9505..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam/3c5ff9bc-b33a-4557-9c76-ccc041de985c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam/1762652579.671542", - "retrieved_timestamp": "1762652579.6715431", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.249595517670891 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31774285416798703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33148958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1566655585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam/64e0c863-f33c-44d7-b244-e5288e5018fb.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam/64e0c863-f33c-44d7-b244-e5288e5018fb.json deleted file mode 100644 index a1b8934c6ab48f03a3a9ac45b119b0f5b7721dac..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam/64e0c863-f33c-44d7-b244-e5288e5018fb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam/1762652579.6717582", - "retrieved_timestamp": "1762652579.6717582", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25149381419079153 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3172338500122228 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3275208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15533577127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep/09f59d70-2948-4eb6-a14e-2550c97b5542.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep/09f59d70-2948-4eb6-a14e-2550c97b5542.json deleted file mode 100644 index 087019ad6afd865e431b3c5389074757cd4c109c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep/09f59d70-2948-4eb6-a14e-2550c97b5542.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep/1762652579.6576698", - "retrieved_timestamp": "1762652579.657671", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2201447714286981 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3217197270809481 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33669791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17096077127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-3ep/57d9c59d-8cd8-4253-a076-8b16becc740e.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-3ep/57d9c59d-8cd8-4253-a076-8b16becc740e.json deleted file mode 100644 index 61d07356b1b5c83f21169aa8372694a2b8aea366..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-3ep/57d9c59d-8cd8-4253-a076-8b16becc740e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-3ep/1762652579.671975", - "retrieved_timestamp": "1762652579.671975", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-3ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-3ep" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22808813946993975 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3239538094779519 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.330125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17461768617021275 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam/5fb209a6-3d82-4017-8e44-3615d7c50218.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam/5fb209a6-3d82-4017-8e44-3615d7c50218.json deleted file mode 100644 index 1b753d0fcc51eef0094cc7f2e8d63f787ee06caf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam/5fb209a6-3d82-4017-8e44-3615d7c50218.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam/1762652579.672395", - "retrieved_timestamp": "1762652579.672396", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25259311958935626 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.323809171214906 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3528229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15741356382978725 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep/2ccd9994-1d9c-40c4-85d0-c74af7544b6d.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep/2ccd9994-1d9c-40c4-85d0-c74af7544b6d.json deleted file mode 100644 index 8b7bde6034a2246108e5af1888f3f7ddab48c945..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep/2ccd9994-1d9c-40c4-85d0-c74af7544b6d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep/1762652579.672603", - "retrieved_timestamp": "1762652579.6726038", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24812165055386376 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31748404240871353 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34752083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15965757978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep/1f1f5c3d-4ee4-4ed8-adeb-9e83942a7e32.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep/1f1f5c3d-4ee4-4ed8-adeb-9e83942a7e32.json deleted file mode 100644 index 67e79002ea5cf0622e73c3ba8a418f2d7e33be18..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep/1f1f5c3d-4ee4-4ed8-adeb-9e83942a7e32.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep/1762652579.672818", - "retrieved_timestamp": "1762652579.672818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25476624245889795 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3199073234678175 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34348958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15616688829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam/f9c4db8f-b56e-41cd-9c87-ba2d4b36520a.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam/f9c4db8f-b56e-41cd-9c87-ba2d4b36520a.json deleted file mode 100644 index e244178345ee2c30fb0e92ea2304cb4993b3824d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam/f9c4db8f-b56e-41cd-9c87-ba2d4b36520a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam/1762652579.673032", - "retrieved_timestamp": "1762652579.673033", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2423015376977531 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32193163799444524 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.033987915407854986 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35152083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15633311170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep/d1ae295e-1364-442c-a3e4-ac2ad9884a78.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep/d1ae295e-1364-442c-a3e4-ac2ad9884a78.json deleted file mode 100644 index f1ccd1501f6caecfac8012aa5b34f91090272c22..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep/d1ae295e-1364-442c-a3e4-ac2ad9884a78.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep/1762652579.673239", - "retrieved_timestamp": "1762652579.67324", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24927082363683917 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3190945593427599 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34752083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15915890957446807 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep/86c29317-7d5f-42c2-a156-615d3c4a259d.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep/86c29317-7d5f-42c2-a156-615d3c4a259d.json deleted file mode 100644 index 46a3812ffdfd15b7f8703566d5fce3b576c7ee29..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep/86c29317-7d5f-42c2-a156-615d3c4a259d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep/1762652579.673455", - "retrieved_timestamp": "1762652579.6734562", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24779695651981187 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3218405915852565 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35152083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15558510638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep/70a5a5fb-9dd6-4b1c-a7ac-11155d5ef837.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep/70a5a5fb-9dd6-4b1c-a7ac-11155d5ef837.json deleted file mode 100644 index 421ff332f60d3d7229811c6a3bf57375f284de4b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep/70a5a5fb-9dd6-4b1c-a7ac-11155d5ef837.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep/1762652579.6721878", - "retrieved_timestamp": "1762652579.6721878", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23478259905938464 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33076056644270485 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34088541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16954787234042554 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5/047ed340-ddb8-40ca-b1ee-10f12b182e43.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5/047ed340-ddb8-40ca-b1ee-10f12b182e43.json deleted file mode 100644 index 0ca0debd8ea1eea5458f909bd90d384c29b45ad4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5/047ed340-ddb8-40ca-b1ee-10f12b182e43.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5/1762652579.65739", - "retrieved_timestamp": "1762652579.657391", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2067558522498083 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3203968601167082 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3486666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16780252659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-2ep/94b65c53-7e0c-4506-bd19-82d23709d269.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-2ep/94b65c53-7e0c-4506-bd19-82d23709d269.json deleted file mode 100644 index c1d18d78ddc56519e4ffc03f014534cc5ac151fc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-2ep/94b65c53-7e0c-4506-bd19-82d23709d269.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-2ep/1762652579.673873", - "retrieved_timestamp": "1762652579.673873", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-2ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-2ep" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21747186354428472 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179879277889672 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33679166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16273271276595744 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-3ep/1c779874-5568-462e-9e6e-0e3fd42d023e.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-3ep/1c779874-5568-462e-9e6e-0e3fd42d023e.json deleted file mode 100644 index 473340d14764a3a3553e7900bfa5dcf9b2331058..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-3ep/1c779874-5568-462e-9e6e-0e3fd42d023e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-3ep/1762652579.674078", - "retrieved_timestamp": "1762652579.674078", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-3ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-3ep" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2198699450790569 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32974820176156994 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35933333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1651429521276596 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-5ep/f562a3e4-6afe-4c1d-a597-6265af34f925.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-5ep/f562a3e4-6afe-4c1d-a597-6265af34f925.json deleted file mode 100644 index 80a496166e2b6a629e39e5dccba9e15d0cf080af..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-5ep/f562a3e4-6afe-4c1d-a597-6265af34f925.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-5ep/1762652579.674291", - "retrieved_timestamp": "1762652579.6742918", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-5ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-5ep" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2077299343519639 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3275980298873716 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3766354166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15866023936170212 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-5e-5/cdbbfad9-85e8-4c8b-b70c-708c08a62798.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-5e-5/cdbbfad9-85e8-4c8b-b70c-708c08a62798.json deleted file mode 100644 index 7141974c3296b039f4fb045ba89d0abd258352be..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-5e-5/cdbbfad9-85e8-4c8b-b70c-708c08a62798.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-5e-5/1762652579.673672", - "retrieved_timestamp": "1762652579.673672", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2009856070781083 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31093810553451656 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.033987915407854986 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33809375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16722074468085107 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-2ep/9cf15d33-3624-4161-bdad-069b09ab2290.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-2ep/9cf15d33-3624-4161-bdad-069b09ab2290.json deleted file mode 100644 index ce79c904c10d82f55d54181eb70d9a365c4bef71..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-2ep/9cf15d33-3624-4161-bdad-069b09ab2290.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-2ep/1762652579.674706", - "retrieved_timestamp": "1762652579.674707", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-2ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-2ep" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2156234347087949 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3100411318318588 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2424496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3367291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15674867021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-3ep/658df4b3-084f-479f-b507-3a4247683651.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-3ep/658df4b3-084f-479f-b507-3a4247683651.json deleted file mode 100644 index ce4501728737b4137dcf22195bb991a7890b4f49..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-3ep/658df4b3-084f-479f-b507-3a4247683651.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-3ep/1762652579.674919", - "retrieved_timestamp": "1762652579.674919", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-3ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-3ep" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23805502732749106 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3199313632207049 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23657718120805368 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3553645833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15217752659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-5ep/4e72cc33-538b-4fa7-8038-89794fed6511.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-5ep/4e72cc33-538b-4fa7-8038-89794fed6511.json deleted file mode 100644 index 56616abcfd4276ceb93376243e7bd7258d369cd6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-5ep/4e72cc33-538b-4fa7-8038-89794fed6511.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-5ep/1762652579.6751308", - "retrieved_timestamp": "1762652579.6751318", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-5ep", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-5ep" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21197644472222593 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32002953673668666 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37127083333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1628158244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-7e-5/891bb442-c054-4941-9bd1-8352139f143e.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-7e-5/891bb442-c054-4941-9bd1-8352139f143e.json deleted file mode 100644 index f9a64b5bbd934af8243e958536c9a9b4ea243200..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-7e-5/891bb442-c054-4941-9bd1-8352139f143e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-7e-5/1762652579.6744971", - "retrieved_timestamp": "1762652579.674498", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20925366915340185 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3158179005969299 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33669791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1622340425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-DPO-1epoch_v1/ac94a989-668a-49e6-9975-9169d7394574.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-DPO-1epoch_v1/ac94a989-668a-49e6-9975-9169d7394574.json deleted file mode 100644 index 2766fefd6e4f73ed2c45dcd45024621eb6e202d7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-DPO-1epoch_v1/ac94a989-668a-49e6-9975-9169d7394574.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-DPO-1epoch_v1/1762652579.67534", - "retrieved_timestamp": "1762652579.6753411", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-DPO-1epoch_v1", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-DPO-1epoch_v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20245947419513555 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.326814314271471 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3209166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13297872340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-MDPO-1epoch_v1/6961b682-04e5-45af-bd2b-8ad6546503e7.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-MDPO-1epoch_v1/6961b682-04e5-45af-bd2b-8ad6546503e7.json deleted file mode 100644 index 83916ef7b221ee639d2828076428d39668565bbe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-MDPO-1epoch_v1/6961b682-04e5-45af-bd2b-8ad6546503e7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-MDPO-1epoch_v1/1762652579.675586", - "retrieved_timestamp": "1762652579.6755872", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT-MDPO-1epoch_v1", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT-MDPO-1epoch_v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1964144026737944 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32925816453885065 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32615625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13372672872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT/eb0f4662-54f5-48ca-b871-726e34bbf540.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT/eb0f4662-54f5-48ca-b871-726e34bbf540.json deleted file mode 100644 index 0045cf2a6ff456d823907cda7653dd7f6ad6ccf0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT/eb0f4662-54f5-48ca-b871-726e34bbf540.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT/1762652579.654298", - "retrieved_timestamp": "1762652579.6542988", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen2.5-0.5B-SFT", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen2.5-0.5B-SFT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19636453498938372 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31207478976310743 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3394270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16730385638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam/e4e00595-e1ed-42c9-a518-ff104253cad9.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam/e4e00595-e1ed-42c9-a518-ff104253cad9.json deleted file mode 100644 index 5ee0d0204f54d3c25c26b7ac85d1ffafda57d049..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam/e4e00595-e1ed-42c9-a518-ff104253cad9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam/1762652579.675801", - "retrieved_timestamp": "1762652579.675801", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25324250765746 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3140431891367934 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33145833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15658244680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam/3a7a5a89-0ab8-47cd-95c6-14a6186e05b9.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam/3a7a5a89-0ab8-47cd-95c6-14a6186e05b9.json deleted file mode 100644 index d31dd6c2a7ff251572d6bf96edd9571b3ef3b7dc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam/3a7a5a89-0ab8-47cd-95c6-14a6186e05b9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam/1762652579.676018", - "retrieved_timestamp": "1762652579.676018", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26695612087040166 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3188575312560274 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32879166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15625 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam/f78ac837-d5f4-48f1-8a9e-1549b0020160.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam/f78ac837-d5f4-48f1-8a9e-1549b0020160.json deleted file mode 100644 index 94ca4f4eb55036372870b7a5613f55fb60c0b3e0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam/f78ac837-d5f4-48f1-8a9e-1549b0020160.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam/1762652579.6762261", - "retrieved_timestamp": "1762652579.6762261", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24807178286945303 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32608064671010917 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3368229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15649933510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam/2ae9cee5-8f3c-4303-802f-481a03edaf9f.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam/2ae9cee5-8f3c-4303-802f-481a03edaf9f.json deleted file mode 100644 index 4a11f8df732c471249a36686ce03821ea507ded3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam/2ae9cee5-8f3c-4303-802f-481a03edaf9f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam/1762652579.67643", - "retrieved_timestamp": "1762652579.6764312", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23832985367713222 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32184656431310543 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3341875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15034906914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam/654b55d0-940c-43bd-9478-0bd67bb7b0d8.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam/654b55d0-940c-43bd-9478-0bd67bb7b0d8.json deleted file mode 100644 index bd0090610ddb10790dc7a9038cc380b77192160f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam/654b55d0-940c-43bd-9478-0bd67bb7b0d8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam/1762652579.676642", - "retrieved_timestamp": "1762652579.6766431", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24714756845170813 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32244323308961736 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33276041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15334109042553193 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam/c23f1072-c7be-4eab-b866-16c6429071e4.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam/c23f1072-c7be-4eab-b866-16c6429071e4.json deleted file mode 100644 index af75d0a66d134ef02f6eae2acaa0822b7a202af3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam/c23f1072-c7be-4eab-b866-16c6429071e4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam/1762652579.6768441", - "retrieved_timestamp": "1762652579.676845", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24474948691693596 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3181429193838813 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.334125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15649933510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam/c02ad005-8e12-46d9-8bb3-090f62c6a946.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam/c02ad005-8e12-46d9-8bb3-090f62c6a946.json deleted file mode 100644 index 927e5c50287f296a0f9bf2fd86734979841c6b3f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam/c02ad005-8e12-46d9-8bb3-090f62c6a946.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam/1762652579.677048", - "retrieved_timestamp": "1762652579.6770492", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2551408041773605 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3194064593640778 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0445619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32615625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1566655585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam/e1d1dd0d-ef8e-44e1-aca1-f10c53f5aa84.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam/e1d1dd0d-ef8e-44e1-aca1-f10c53f5aa84.json deleted file mode 100644 index cf74787a4fd3cbaf7c35bc4ec3dfa4f1ac200928..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam/e1d1dd0d-ef8e-44e1-aca1-f10c53f5aa84.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam/1762652579.677404", - "retrieved_timestamp": "1762652579.677407", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25379216035674235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31530652457997205 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.326125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1583277925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam/02c4e0de-4a4e-44b7-bc4c-44c92ade94ec.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam/02c4e0de-4a4e-44b7-bc4c-44c92ade94ec.json deleted file mode 100644 index 87b63916c5d0ad7f145aa37164708dd044b8b06c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam/02c4e0de-4a4e-44b7-bc4c-44c92ade94ec.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam/1762652579.677789", - "retrieved_timestamp": "1762652579.67779", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24022815019703275 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3168335157841944 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33279166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1568317819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam/4e38a2db-c67e-4f2a-84a0-f9afa7d32bd5.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam/4e38a2db-c67e-4f2a-84a0-f9afa7d32bd5.json deleted file mode 100644 index 86e0177a854f9160a87316d52a74723f5382be7c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam/4e38a2db-c67e-4f2a-84a0-f9afa7d32bd5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam/1762652579.678058", - "retrieved_timestamp": "1762652579.67806", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24839647690350491 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3210570160312575 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1573304521276596 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam/77255cfb-3e18-4a3b-98a8-b0072aacb669.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam/77255cfb-3e18-4a3b-98a8-b0072aacb669.json deleted file mode 100644 index 61be6349d0c260cb05054017964c9651e3afef7c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam/77255cfb-3e18-4a3b-98a8-b0072aacb669.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam/1762652579.6783109", - "retrieved_timestamp": "1762652579.6783118", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25781371206177384 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32030958605054793 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32885416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1583277925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_1e-6-3ep_0alp_0lam/be9afede-e624-43e6-99dd-52e0d2b413ac.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_1e-6-3ep_0alp_0lam/be9afede-e624-43e6-99dd-52e0d2b413ac.json deleted file mode 100644 index 14ecc89038b5adb271e6658adca78a9206af235d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_1e-6-3ep_0alp_0lam/be9afede-e624-43e6-99dd-52e0d2b413ac.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_1e-6-3ep_0alp_0lam/1762652579.678605", - "retrieved_timestamp": "1762652579.678606", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPO_1e-6-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPO_1e-6-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23163539408768735 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3258499805340021 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.322125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15799534574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_1e-7-3ep_0alp_0lam/9632892a-a6b2-4f17-827e-bfef9a712985.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_1e-7-3ep_0alp_0lam/9632892a-a6b2-4f17-827e-bfef9a712985.json deleted file mode 100644 index b47e93966299c78862c037e9819b7b14af9a418b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_1e-7-3ep_0alp_0lam/9632892a-a6b2-4f17-827e-bfef9a712985.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_1e-7-3ep_0alp_0lam/1762652579.678855", - "retrieved_timestamp": "1762652579.678856", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPO_1e-7-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPO_1e-7-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23598163982677073 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3225125170893353 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32221875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1595744680851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-6-1ep_0alp_0lam/a690910a-388f-4a51-98a2-fc1e1bb327e2.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-6-1ep_0alp_0lam/a690910a-388f-4a51-98a2-fc1e1bb327e2.json deleted file mode 100644 index 19e07b6e25a6e17c4f0f746e85af46f58010b932..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-6-1ep_0alp_0lam/a690910a-388f-4a51-98a2-fc1e1bb327e2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-6-1ep_0alp_0lam/1762652579.679086", - "retrieved_timestamp": "1762652579.679086", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPO_3e-6-1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPO_3e-6-1ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23370878158840763 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3132229900705577 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3235208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15325797872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-6-2ep_0alp_0lam/8c8eafcc-bb0f-4483-93ff-1379158a5d10.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-6-2ep_0alp_0lam/8c8eafcc-bb0f-4483-93ff-1379158a5d10.json deleted file mode 100644 index 7704fcced05333ef0a0289247cad149e8465b9f9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-6-2ep_0alp_0lam/8c8eafcc-bb0f-4483-93ff-1379158a5d10.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-6-2ep_0alp_0lam/1762652579.6792939", - "retrieved_timestamp": "1762652579.679295", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPO_3e-6-2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPO_3e-6-2ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25693936532843964 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32760017293049276 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3155833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15649933510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-6-3ep_0alp_0lam/6c009b93-145d-4630-bda1-fb24bf764e7a.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-6-3ep_0alp_0lam/6c009b93-145d-4630-bda1-fb24bf764e7a.json deleted file mode 100644 index 74fedd3838547e8515408d6a61b70a1e825e41e7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-6-3ep_0alp_0lam/6c009b93-145d-4630-bda1-fb24bf764e7a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-6-3ep_0alp_0lam/1762652579.679507", - "retrieved_timestamp": "1762652579.679507", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPO_3e-6-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPO_3e-6-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24599839536873275 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32674094707635526 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3209166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15433843085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-7-1ep_0alp_0lam/1b4ccc58-920c-4089-b8ca-af3c71c5c3be.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-7-1ep_0alp_0lam/1b4ccc58-920c-4089-b8ca-af3c71c5c3be.json deleted file mode 100644 index 7f7e027ac7e5f54e937fb28a30e57c1c2a172ecc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-7-1ep_0alp_0lam/1b4ccc58-920c-4089-b8ca-af3c71c5c3be.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-7-1ep_0alp_0lam/1762652579.679712", - "retrieved_timestamp": "1762652579.679712", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPO_3e-7-1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPO_3e-7-1ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2529178136234081 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32292563083414066 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3195208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15965757978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-7-2ep_0alp_0lam/4d278257-d64b-4da7-bcd6-0d3fbee80dd8.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-7-2ep_0alp_0lam/4d278257-d64b-4da7-bcd6-0d3fbee80dd8.json deleted file mode 100644 index 3effa9e78cfbe4d85b8287b4d7ccfaf5b4071352..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-7-2ep_0alp_0lam/4d278257-d64b-4da7-bcd6-0d3fbee80dd8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-7-2ep_0alp_0lam/1762652579.6799219", - "retrieved_timestamp": "1762652579.679923", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPO_3e-7-2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPO_3e-7-2ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25046986440422525 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3255735108237258 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3194895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15990691489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-7-3ep_0alp_0lam/3650d718-e20a-4310-a248-3897f7713e93.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-7-3ep_0alp_0lam/3650d718-e20a-4310-a248-3897f7713e93.json deleted file mode 100644 index a0f5909829a0024d0b8e03224767050595163086..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-7-3ep_0alp_0lam/3650d718-e20a-4310-a248-3897f7713e93.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-7-3ep_0alp_0lam/1762652579.680135", - "retrieved_timestamp": "1762652579.680136", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPO_3e-7-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPO_3e-7-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2387044153955948 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3258394284267221 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0445619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31685416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1589095744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_5e-7-1ep_0alp_0lam/6e224cd8-7f12-42a0-968e-311450d24e58.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_5e-7-1ep_0alp_0lam/6e224cd8-7f12-42a0-968e-311450d24e58.json deleted file mode 100644 index 0473c073418dbb695cc4d3447b17c1be837abe83..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_5e-7-1ep_0alp_0lam/6e224cd8-7f12-42a0-968e-311450d24e58.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_5e-7-1ep_0alp_0lam/1762652579.6803432", - "retrieved_timestamp": "1762652579.6803432", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPO_5e-7-1ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPO_5e-7-1ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25324250765746 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32182747858122923 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32085416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15932513297872342 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_5e-7-2ep_0alp_0lam/1f17dbf3-f498-41cb-8ec0-5dabb2d9655e.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_5e-7-2ep_0alp_0lam/1f17dbf3-f498-41cb-8ec0-5dabb2d9655e.json deleted file mode 100644 index b0aecbaa318d94cbadb926fff945b61334bfd169..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_5e-7-2ep_0alp_0lam/1f17dbf3-f498-41cb-8ec0-5dabb2d9655e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_5e-7-2ep_0alp_0lam/1762652579.680558", - "retrieved_timestamp": "1762652579.6805592", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPO_5e-7-2ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPO_5e-7-2ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24562383365027018 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3299192088381941 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.318125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16015625 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_5e-7-3ep_0alp_0lam/c5829ba8-e45c-4242-b308-9455f832cb58.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_5e-7-3ep_0alp_0lam/c5829ba8-e45c-4242-b308-9455f832cb58.json deleted file mode 100644 index 4e154fe941783755f08853b0f07624f36c282ea2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_5e-7-3ep_0alp_0lam/c5829ba8-e45c-4242-b308-9455f832cb58.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_5e-7-3ep_0alp_0lam/1762652579.680775", - "retrieved_timestamp": "1762652579.680775", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-DPO_5e-7-3ep_0alp_0lam", - "developer": "JayHyeon", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-DPO_5e-7-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24225167001334236 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32712145602920534 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.318125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15949135638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_Llama-3-Instruct-8B-SimPO-v0.2/4d7428e8-41a2-4834-900e-e43b05f4d131.json b/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_Llama-3-Instruct-8B-SimPO-v0.2/4d7428e8-41a2-4834-900e-e43b05f4d131.json deleted file mode 100644 index 4bca5680cb4658a1be2c9e84839b2d885b66cd6f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_Llama-3-Instruct-8B-SimPO-v0.2/4d7428e8-41a2-4834-900e-e43b05f4d131.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Jimmy19991222_Llama-3-Instruct-8B-SimPO-v0.2/1762652579.692669", - "retrieved_timestamp": "1762652579.692669", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Jimmy19991222/Llama-3-Instruct-8B-SimPO-v0.2", - "developer": "Jimmy19991222", - "inference_platform": "unknown", - "id": "Jimmy19991222/Llama-3-Instruct-8B-SimPO-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6540368444615842 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.498371102582105 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40125000000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686003989361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun/9e8f395c-f481-4a64-86ee-053961b17c42.json b/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun/9e8f395c-f481-4a64-86ee-053961b17c42.json deleted file mode 100644 index 23519410c8201d2a8fdd2e0dba8bf702b41a52e5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun/9e8f395c-f481-4a64-86ee-053961b17c42.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun/1762652579.6929338", - "retrieved_timestamp": "1762652579.692935", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun", - "developer": "Jimmy19991222", - "inference_platform": "unknown", - "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6717221416951467 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48797965672899357 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4040729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36336436170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log/913d1072-8ea3-4e0d-9d72-d30ae186dc7d.json b/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log/913d1072-8ea3-4e0d-9d72-d30ae186dc7d.json deleted file mode 100644 index dcdbbe169b50f91f27f6d5089b004d904e6a040b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log/913d1072-8ea3-4e0d-9d72-d30ae186dc7d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log/1762652579.6931531", - "retrieved_timestamp": "1762652579.693154", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log", - "developer": "Jimmy19991222", - "inference_platform": "unknown", - "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6555605792630221 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49345840367294164 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4000104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3657746010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log/55baee54-fb05-49a1-962d-145a93de91a8.json b/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log/55baee54-fb05-49a1-962d-145a93de91a8.json deleted file mode 100644 index d080d381e70dd3ad5e78f05b9ec1910de6e282af..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log/55baee54-fb05-49a1-962d-145a93de91a8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log/1762652579.693368", - "retrieved_timestamp": "1762652579.6933692", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log", - "developer": "Jimmy19991222", - "inference_platform": "unknown", - "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6315055164740666 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4916414793938901 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3935 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3611203457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4/601e250a-5c2f-4947-9ea3-0f903b2823ec.json b/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4/601e250a-5c2f-4947-9ea3-0f903b2823ec.json deleted file mode 100644 index 8544a40d8edd859e4740904fd232a9fc2cda4a9f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4/601e250a-5c2f-4947-9ea3-0f903b2823ec.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4/1762652579.69359", - "retrieved_timestamp": "1762652579.693591", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4", - "developer": "Jimmy19991222", - "inference_platform": "unknown", - "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6284580468711907 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4986088445592742 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40137500000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3544714095744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun/8ab1619c-6edf-457e-9834-0e9dc127d6a4.json b/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun/8ab1619c-6edf-457e-9834-0e9dc127d6a4.json deleted file mode 100644 index c35539c8d11db255003ea785b6483f6be438fae1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun/8ab1619c-6edf-457e-9834-0e9dc127d6a4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun/1762652579.69381", - "retrieved_timestamp": "1762652579.693811", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun", - "developer": "Jimmy19991222", - "inference_platform": "unknown", - "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6677504576745258 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4940463886115545 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3987083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3657746010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log/5f6d2c1e-1c66-4b1c-beed-a730d93d997f.json b/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log/5f6d2c1e-1c66-4b1c-beed-a730d93d997f.json deleted file mode 100644 index 3d34b669b48b24808c9806e505be49366ee7e29f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log/5f6d2c1e-1c66-4b1c-beed-a730d93d997f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log/1762652579.69404", - "retrieved_timestamp": "1762652579.694041", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log", - "developer": "Jimmy19991222", - "inference_platform": "unknown", - "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6605063453857986 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49160075581298046 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06570996978851963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4000416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3664394946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log/6621f47a-13c7-421c-b054-cc9116a04e4e.json b/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log/6621f47a-13c7-421c-b054-cc9116a04e4e.json deleted file mode 100644 index 746499d28b799cd5a430c95728ab173c4f00ca05..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log/6621f47a-13c7-421c-b054-cc9116a04e4e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log/1762652579.694266", - "retrieved_timestamp": "1762652579.6942668", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log", - "developer": "Jimmy19991222", - "inference_platform": "unknown", - "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.649190813707629 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4952489348573605 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3961354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37109375 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/JungZoona/JungZoona_T3Q-Qwen2.5-14B-Instruct-1M-e3/464673ee-0238-40b4-9c15-1a1551b9f65c.json b/leaderboard_data/HFOpenLLMv2/JungZoona/JungZoona_T3Q-Qwen2.5-14B-Instruct-1M-e3/464673ee-0238-40b4-9c15-1a1551b9f65c.json deleted file mode 100644 index c9d621281dae5e8cb071a2bc24904aa7b05fb477..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/JungZoona/JungZoona_T3Q-Qwen2.5-14B-Instruct-1M-e3/464673ee-0238-40b4-9c15-1a1551b9f65c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JungZoona_T3Q-Qwen2.5-14B-Instruct-1M-e3/1762652579.696794", - "retrieved_timestamp": "1762652579.696794", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JungZoona/T3Q-Qwen2.5-14B-Instruct-1M-e3", - "developer": "JungZoona", - "inference_platform": "unknown", - "id": "JungZoona/T3Q-Qwen2.5-14B-Instruct-1M-e3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.732396707403024 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7585971930826706 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2862537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41694630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5911041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5884308510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Unknown", - "params_billions": 0.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/KSU-HW-SEC/KSU-HW-SEC_Llama3-70b-SVA-FT-1415/08fcda98-72e9-4338-b2a2-6db924a47288.json b/leaderboard_data/HFOpenLLMv2/KSU-HW-SEC/KSU-HW-SEC_Llama3-70b-SVA-FT-1415/08fcda98-72e9-4338-b2a2-6db924a47288.json deleted file mode 100644 index 933b956d883344b68b544c47850b39f5278c740c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/KSU-HW-SEC/KSU-HW-SEC_Llama3-70b-SVA-FT-1415/08fcda98-72e9-4338-b2a2-6db924a47288.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/KSU-HW-SEC_Llama3-70b-SVA-FT-1415/1762652579.6977122", - "retrieved_timestamp": "1762652579.697713", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "KSU-HW-SEC/Llama3-70b-SVA-FT-1415", - "developer": "KSU-HW-SEC", - "inference_platform": "unknown", - "id": "KSU-HW-SEC/Llama3-70b-SVA-FT-1415" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6179913739987677 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6650146340680478 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21978851963746224 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4565416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5242686170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/KSU-HW-SEC/KSU-HW-SEC_Llama3-70b-SVA-FT-500/4282c191-344e-4326-a80e-49b712687e7c.json b/leaderboard_data/HFOpenLLMv2/KSU-HW-SEC/KSU-HW-SEC_Llama3-70b-SVA-FT-500/4282c191-344e-4326-a80e-49b712687e7c.json deleted file mode 100644 index dffec2dacba3e0e30ab7abcb1c0a968e2eaf5aab..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/KSU-HW-SEC/KSU-HW-SEC_Llama3-70b-SVA-FT-500/4282c191-344e-4326-a80e-49b712687e7c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/KSU-HW-SEC_Llama3-70b-SVA-FT-500/1762652579.6980212", - "retrieved_timestamp": "1762652579.698022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "KSU-HW-SEC/Llama3-70b-SVA-FT-500", - "developer": "KSU-HW-SEC", - "inference_platform": "unknown", - "id": "KSU-HW-SEC/Llama3-70b-SVA-FT-500" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6105223030448099 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6692236023098005 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21374622356495468 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45114583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.522689494680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/KSU-HW-SEC/KSU-HW-SEC_Llama3-70b-SVA-FT-final/58fe6545-2f0c-44de-a29b-2da839b141a4.json b/leaderboard_data/HFOpenLLMv2/KSU-HW-SEC/KSU-HW-SEC_Llama3-70b-SVA-FT-final/58fe6545-2f0c-44de-a29b-2da839b141a4.json deleted file mode 100644 index 10279916785def01a81f680b5bb1d62de56fdec0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/KSU-HW-SEC/KSU-HW-SEC_Llama3-70b-SVA-FT-final/58fe6545-2f0c-44de-a29b-2da839b141a4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/KSU-HW-SEC_Llama3-70b-SVA-FT-final/1762652579.698244", - "retrieved_timestamp": "1762652579.698245", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "KSU-HW-SEC/Llama3-70b-SVA-FT-final", - "developer": "KSU-HW-SEC", - "inference_platform": "unknown", - "id": "KSU-HW-SEC/Llama3-70b-SVA-FT-final" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6164676391973297 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6650146340680478 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21978851963746224 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4565416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5242686170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/KSU-HW-SEC/KSU-HW-SEC_Llama3.1-70b-SVA-FT-1000step/fe896cef-7667-482d-b7f1-5361fc66ccce.json b/leaderboard_data/HFOpenLLMv2/KSU-HW-SEC/KSU-HW-SEC_Llama3.1-70b-SVA-FT-1000step/fe896cef-7667-482d-b7f1-5361fc66ccce.json deleted file mode 100644 index 5d0fd4ebf2f000b88cf7a863ed9ccb5b7326b116..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/KSU-HW-SEC/KSU-HW-SEC_Llama3.1-70b-SVA-FT-1000step/fe896cef-7667-482d-b7f1-5361fc66ccce.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/KSU-HW-SEC_Llama3.1-70b-SVA-FT-1000step/1762652579.698519", - "retrieved_timestamp": "1762652579.69852", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "KSU-HW-SEC/Llama3.1-70b-SVA-FT-1000step", - "developer": "KSU-HW-SEC", - "inference_platform": "unknown", - "id": "KSU-HW-SEC/Llama3.1-70b-SVA-FT-1000step" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7238039512936785 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6903120365165111 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32099697885196377 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3959731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45917708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5251828457446809 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Khetterman/Khetterman_DarkAtom-12B-v3/64802b86-879e-4072-b5ad-aab17d7251f0.json b/leaderboard_data/HFOpenLLMv2/Khetterman/Khetterman_DarkAtom-12B-v3/64802b86-879e-4072-b5ad-aab17d7251f0.json deleted file mode 100644 index c8f733aa01e6b17f6b3544bcdc5841cf75c1983d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Khetterman/Khetterman_DarkAtom-12B-v3/64802b86-879e-4072-b5ad-aab17d7251f0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Khetterman_DarkAtom-12B-v3/1762652579.6987362", - "retrieved_timestamp": "1762652579.698737", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Khetterman/DarkAtom-12B-v3", - "developer": "Khetterman", - "inference_platform": "unknown", - "id": "Khetterman/DarkAtom-12B-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6173419859306639 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5153709655381875 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11102719033232629 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4468020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3546376329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Khetterman/Khetterman_Kosmos-8B-v1/936cbaa1-e55b-46b8-9610-a5a8faaf4434.json b/leaderboard_data/HFOpenLLMv2/Khetterman/Khetterman_Kosmos-8B-v1/936cbaa1-e55b-46b8-9610-a5a8faaf4434.json deleted file mode 100644 index dd0ae491329289d3f45250b9bc1b8634971d14db..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Khetterman/Khetterman_Kosmos-8B-v1/936cbaa1-e55b-46b8-9610-a5a8faaf4434.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Khetterman_Kosmos-8B-v1/1762652579.6990001", - "retrieved_timestamp": "1762652579.699001", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Khetterman/Kosmos-8B-v1", - "developer": "Khetterman", - "inference_platform": "unknown", - "id": "Khetterman/Kosmos-8B-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41291107594515886 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5233522858623628 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09894259818731117 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3918854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.366938164893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/KingNish/KingNish_Reasoning-0.5b/98f5e59e-0bdb-405b-a18e-3addd8920951.json b/leaderboard_data/HFOpenLLMv2/KingNish/KingNish_Reasoning-0.5b/98f5e59e-0bdb-405b-a18e-3addd8920951.json deleted file mode 100644 index 42d569370844f0cff7e8771ceacd08fc132ea334..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/KingNish/KingNish_Reasoning-0.5b/98f5e59e-0bdb-405b-a18e-3addd8920951.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/KingNish_Reasoning-0.5b/1762652579.6997252", - "retrieved_timestamp": "1762652579.699726", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "KingNish/Reasoning-0.5b", - "developer": "KingNish", - "inference_platform": "unknown", - "id": "KingNish/Reasoning-0.5b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.217421995859874 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33536255853174524 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35133333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16414561170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Kquant03/Kquant03_CognitiveFusion2-4x7B-BF16/66f84aee-5d79-4fec-9fff-799ac874d165.json b/leaderboard_data/HFOpenLLMv2/Kquant03/Kquant03_CognitiveFusion2-4x7B-BF16/66f84aee-5d79-4fec-9fff-799ac874d165.json deleted file mode 100644 index 7f3bee28111ee33ba19e82ddb9642ff04367adc1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Kquant03/Kquant03_CognitiveFusion2-4x7B-BF16/66f84aee-5d79-4fec-9fff-799ac874d165.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Kquant03_CognitiveFusion2-4x7B-BF16/1762652579.701032", - "retrieved_timestamp": "1762652579.7010329", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Kquant03/CognitiveFusion2-4x7B-BF16", - "developer": "Kquant03", - "inference_platform": "unknown", - "id": "Kquant03/CognitiveFusion2-4x7B-BF16" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35665700341759865 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41078286111483786 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4145520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27925531914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.154 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Kquant03/Kquant03_L3-Pneuma-8B/5420d88b-bc26-4d04-9812-ffce8a3564e6.json b/leaderboard_data/HFOpenLLMv2/Kquant03/Kquant03_L3-Pneuma-8B/5420d88b-bc26-4d04-9812-ffce8a3564e6.json deleted file mode 100644 index 3fbd6031725f1b315619ab6a102a807b87f976ee..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Kquant03/Kquant03_L3-Pneuma-8B/5420d88b-bc26-4d04-9812-ffce8a3564e6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Kquant03_L3-Pneuma-8B/1762652579.701272", - "retrieved_timestamp": "1762652579.7012732", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Kquant03/L3-Pneuma-8B", - "developer": "Kquant03", - "inference_platform": "unknown", - "id": "Kquant03/L3-Pneuma-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2374056392593873 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49550433176754827 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41715624999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31840093085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Krystalan/Krystalan_DRT-o1-14B/dbd87f5e-e5ba-447b-8416-b6413c3dab09.json b/leaderboard_data/HFOpenLLMv2/Krystalan/Krystalan_DRT-o1-14B/dbd87f5e-e5ba-447b-8416-b6413c3dab09.json deleted file mode 100644 index 8523783ec285c203cf7789fe276dccefae7a5ef6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Krystalan/Krystalan_DRT-o1-14B/dbd87f5e-e5ba-447b-8416-b6413c3dab09.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Krystalan_DRT-o1-14B/1762652579.70148", - "retrieved_timestamp": "1762652579.7014809", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Krystalan/DRT-o1-14B", - "developer": "Krystalan", - "inference_platform": "unknown", - "id": "Krystalan/DRT-o1-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4067662690549963 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.637927537514229 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4826283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47951041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5178690159574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Krystalan/Krystalan_DRT-o1-7B/acb8e4cc-41b2-47ef-b819-d480189c618c.json b/leaderboard_data/HFOpenLLMv2/Krystalan/Krystalan_DRT-o1-7B/acb8e4cc-41b2-47ef-b819-d480189c618c.json deleted file mode 100644 index 2ed883920ebacda7cb2243ce983357e887dab62e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Krystalan/Krystalan_DRT-o1-7B/acb8e4cc-41b2-47ef-b819-d480189c618c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Krystalan_DRT-o1-7B/1762652579.701715", - "retrieved_timestamp": "1762652579.701716", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Krystalan/DRT-o1-7B", - "developer": "Krystalan", - "inference_platform": "unknown", - "id": "Krystalan/DRT-o1-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3928276971768242 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5467693339610741 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4478851963746224 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.50865625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41514295212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Kukedlc/Kukedlc_NeuralExperiment-7b-MagicCoder-v7.5/4775e169-e3a7-41b6-bf1e-a7e8e0edb4fc.json b/leaderboard_data/HFOpenLLMv2/Kukedlc/Kukedlc_NeuralExperiment-7b-MagicCoder-v7.5/4775e169-e3a7-41b6-bf1e-a7e8e0edb4fc.json deleted file mode 100644 index f27f4601c7d69bf09c2be849baa0f51d17565955..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Kukedlc/Kukedlc_NeuralExperiment-7b-MagicCoder-v7.5/4775e169-e3a7-41b6-bf1e-a7e8e0edb4fc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralExperiment-7b-MagicCoder-v7.5/1762652579.701928", - "retrieved_timestamp": "1762652579.7019289", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Kukedlc/NeuralExperiment-7b-MagicCoder-v7.5", - "developer": "Kukedlc", - "inference_platform": "unknown", - "id": "Kukedlc/NeuralExperiment-7b-MagicCoder-v7.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4552509563513699 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3988446544778517 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4281979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2824135638297872 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Kukedlc/Kukedlc_NeuralSynthesis-7B-v0.1/3d2603e3-d556-48e8-ba94-555faf9f1807.json b/leaderboard_data/HFOpenLLMv2/Kukedlc/Kukedlc_NeuralSynthesis-7B-v0.1/3d2603e3-d556-48e8-ba94-555faf9f1807.json deleted file mode 100644 index eec2d7492eb0ef8d6a1a5f20424056a824a029e8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Kukedlc/Kukedlc_NeuralSynthesis-7B-v0.1/3d2603e3-d556-48e8-ba94-555faf9f1807.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralSynthesis-7B-v0.1/1762652579.7026482", - "retrieved_timestamp": "1762652579.702649", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Kukedlc/NeuralSynthesis-7B-v0.1", - "developer": "Kukedlc", - "inference_platform": "unknown", - "id": "Kukedlc/NeuralSynthesis-7B-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4184563624516283 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5144745481048844 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43328125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.304936835106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Kukedlc/Kukedlc_NeuralSynthesis-7B-v0.3/b3412f38-d0bc-47c9-a750-14bdbf4e65d8.json b/leaderboard_data/HFOpenLLMv2/Kukedlc/Kukedlc_NeuralSynthesis-7B-v0.3/b3412f38-d0bc-47c9-a750-14bdbf4e65d8.json deleted file mode 100644 index 8ee26453d0f778fd7f6622ba81a502295a0582a6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Kukedlc/Kukedlc_NeuralSynthesis-7B-v0.3/b3412f38-d0bc-47c9-a750-14bdbf4e65d8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralSynthesis-7B-v0.3/1762652579.702864", - "retrieved_timestamp": "1762652579.702865", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Kukedlc/NeuralSynthesis-7B-v0.3", - "developer": "Kukedlc", - "inference_platform": "unknown", - "id": "Kukedlc/NeuralSynthesis-7B-v0.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4078400865259733 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5138078814382175 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4345833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30501994680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Kukedlc/Kukedlc_NeuralSynthesis-7b-v0.4-slerp/4e30bf00-f6b7-4c28-8cf8-dc64427fb958.json b/leaderboard_data/HFOpenLLMv2/Kukedlc/Kukedlc_NeuralSynthesis-7b-v0.4-slerp/4e30bf00-f6b7-4c28-8cf8-dc64427fb958.json deleted file mode 100644 index 2a9b9b75891d865dcf69740116e383637f29dfe6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Kukedlc/Kukedlc_NeuralSynthesis-7b-v0.4-slerp/4e30bf00-f6b7-4c28-8cf8-dc64427fb958.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralSynthesis-7b-v0.4-slerp/1762652579.7030761", - "retrieved_timestamp": "1762652579.703077", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Kukedlc/NeuralSynthesis-7b-v0.4-slerp", - "developer": "Kukedlc", - "inference_platform": "unknown", - "id": "Kukedlc/NeuralSynthesis-7b-v0.4-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3947259936967247 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5142932549151301 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43324999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3042719414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Kumar955/Kumar955_Hemanth-llm/0787e240-a1f4-444a-b3dd-7ef1a1d394b4.json b/leaderboard_data/HFOpenLLMv2/Kumar955/Kumar955_Hemanth-llm/0787e240-a1f4-444a-b3dd-7ef1a1d394b4.json deleted file mode 100644 index 91295717789cb7038d35483a92b5865ea806a59a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Kumar955/Kumar955_Hemanth-llm/0787e240-a1f4-444a-b3dd-7ef1a1d394b4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Kumar955_Hemanth-llm/1762652579.703545", - "retrieved_timestamp": "1762652579.703546", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Kumar955/Hemanth-llm", - "developer": "Kumar955", - "inference_platform": "unknown", - "id": "Kumar955/Hemanth-llm" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5045102550122564 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.522494907014536 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4485625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3112533244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/L-RAGE/L-RAGE_3_PRYMMAL-ECE-7B-SLERP-V1/02fee4d1-8899-4a93-b6f1-a1a8d251cedd.json b/leaderboard_data/HFOpenLLMv2/L-RAGE/L-RAGE_3_PRYMMAL-ECE-7B-SLERP-V1/02fee4d1-8899-4a93-b6f1-a1a8d251cedd.json deleted file mode 100644 index cbc899f7e5100cb9090e4cf371cc4ea068bb012c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/L-RAGE/L-RAGE_3_PRYMMAL-ECE-7B-SLERP-V1/02fee4d1-8899-4a93-b6f1-a1a8d251cedd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/L-RAGE_3_PRYMMAL-ECE-7B-SLERP-V1/1762652579.703805", - "retrieved_timestamp": "1762652579.703806", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "L-RAGE/3_PRYMMAL-ECE-7B-SLERP-V1", - "developer": "L-RAGE", - "inference_platform": "unknown", - "id": "L-RAGE/3_PRYMMAL-ECE-7B-SLERP-V1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27422572108671656 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.422793974567173 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3841354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29247007978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LGAI-EXAONE/LGAI-EXAONE_EXAONE-3.0-7.8B-Instruct/97f7c73d-6d69-4c04-9cff-4914253003b0.json b/leaderboard_data/HFOpenLLMv2/LGAI-EXAONE/LGAI-EXAONE_EXAONE-3.0-7.8B-Instruct/97f7c73d-6d69-4c04-9cff-4914253003b0.json deleted file mode 100644 index 257ba699656c8cc7e1e17a3857928de51c6ab7d4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LGAI-EXAONE/LGAI-EXAONE_EXAONE-3.0-7.8B-Instruct/97f7c73d-6d69-4c04-9cff-4914253003b0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LGAI-EXAONE_EXAONE-3.0-7.8B-Instruct/1762652579.705025", - "retrieved_timestamp": "1762652579.705025", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", - "developer": "LGAI-EXAONE", - "inference_platform": "unknown", - "id": "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7192826145737754 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4174432647784512 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30438066465256797 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.366125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35771276595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "ExaoneForCausalLM", - "params_billions": 7.8 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LGAI-EXAONE/LGAI-EXAONE_EXAONE-3.5-2.4B-Instruct/e2a2d764-ba6b-450d-8f94-abf2af95e793.json b/leaderboard_data/HFOpenLLMv2/LGAI-EXAONE/LGAI-EXAONE_EXAONE-3.5-2.4B-Instruct/e2a2d764-ba6b-450d-8f94-abf2af95e793.json deleted file mode 100644 index 261b260a3da4f800e7f2047e4efb1c494153e82e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LGAI-EXAONE/LGAI-EXAONE_EXAONE-3.5-2.4B-Instruct/e2a2d764-ba6b-450d-8f94-abf2af95e793.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LGAI-EXAONE_EXAONE-3.5-2.4B-Instruct/1762652579.705282", - "retrieved_timestamp": "1762652579.7052832", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct", - "developer": "LGAI-EXAONE", - "inference_platform": "unknown", - "id": "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7950449252428002 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4092347113723405 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3678247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.366125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32804188829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "ExaoneForCausalLM", - "params_billions": 2.405 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LGAI-EXAONE/LGAI-EXAONE_EXAONE-3.5-32B-Instruct/a172b1d1-6d6e-4cd9-9a85-78cb4f71661e.json b/leaderboard_data/HFOpenLLMv2/LGAI-EXAONE/LGAI-EXAONE_EXAONE-3.5-32B-Instruct/a172b1d1-6d6e-4cd9-9a85-78cb4f71661e.json deleted file mode 100644 index d41a58c59979a6f8ad8aa0235aee05d628e9edb3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LGAI-EXAONE/LGAI-EXAONE_EXAONE-3.5-32B-Instruct/a172b1d1-6d6e-4cd9-9a85-78cb4f71661e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LGAI-EXAONE_EXAONE-3.5-32B-Instruct/1762652579.705488", - "retrieved_timestamp": "1762652579.705489", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LGAI-EXAONE/EXAONE-3.5-32B-Instruct", - "developer": "LGAI-EXAONE", - "inference_platform": "unknown", - "id": "LGAI-EXAONE/EXAONE-3.5-32B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8391833668000904 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5760913742720142 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5128398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38066666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4636801861702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "ExaoneForCausalLM", - "params_billions": 32.003 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LGAI-EXAONE/LGAI-EXAONE_EXAONE-3.5-7.8B-Instruct/7fa474fb-4aa1-4855-9759-a28056c7a5e7.json b/leaderboard_data/HFOpenLLMv2/LGAI-EXAONE/LGAI-EXAONE_EXAONE-3.5-7.8B-Instruct/7fa474fb-4aa1-4855-9759-a28056c7a5e7.json deleted file mode 100644 index 18ba8467c0d2462d7fdfe598f484a36916f1ddbf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LGAI-EXAONE/LGAI-EXAONE_EXAONE-3.5-7.8B-Instruct/7fa474fb-4aa1-4855-9759-a28056c7a5e7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LGAI-EXAONE_EXAONE-3.5-7.8B-Instruct/1762652579.705873", - "retrieved_timestamp": "1762652579.705875", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct", - "developer": "LGAI-EXAONE", - "inference_platform": "unknown", - "id": "LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8136045692096969 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4727592304359862 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47507552870090636 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3779375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4133144946808511 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "ExaoneForCausalLM", - "params_billions": 7.818 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LLM360/LLM360_K2-Chat/f7e7c296-74f4-49fa-946d-142341749355.json b/leaderboard_data/HFOpenLLMv2/LLM360/LLM360_K2-Chat/f7e7c296-74f4-49fa-946d-142341749355.json deleted file mode 100644 index 4f02e98bd16340be2e5831a8f959723c4b5daf7e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LLM360/LLM360_K2-Chat/f7e7c296-74f4-49fa-946d-142341749355.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LLM360_K2-Chat/1762652579.706591", - "retrieved_timestamp": "1762652579.706592", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LLM360/K2-Chat", - "developer": "LLM360", - "inference_platform": "unknown", - "id": "LLM360/K2-Chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5151763986223221 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5358099630242067 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.457 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3371010638297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 65.286 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LLM360/LLM360_K2/4b1e267f-90c4-403a-a7cd-5c006153408b.json b/leaderboard_data/HFOpenLLMv2/LLM360/LLM360_K2/4b1e267f-90c4-403a-a7cd-5c006153408b.json deleted file mode 100644 index ab179cbfc24273bb707c09772436eeb975f5fd8f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LLM360/LLM360_K2/4b1e267f-90c4-403a-a7cd-5c006153408b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LLM360_K2/1762652579.706215", - "retrieved_timestamp": "1762652579.7062159", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LLM360/K2", - "developer": "LLM360", - "inference_platform": "unknown", - "id": "LLM360/K2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2252157608478836 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4971835676523677 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39799999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30044880319148937 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 65.286 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LLM4Binary/LLM4Binary_llm4decompile-1.3b-v2/86f0a81b-69da-4f36-a6b0-8a36f79d5c1c.json b/leaderboard_data/HFOpenLLMv2/LLM4Binary/LLM4Binary_llm4decompile-1.3b-v2/86f0a81b-69da-4f36-a6b0-8a36f79d5c1c.json deleted file mode 100644 index a95191eddc57bbbf7702b21ceed1f5db1b31bc66..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LLM4Binary/LLM4Binary_llm4decompile-1.3b-v2/86f0a81b-69da-4f36-a6b0-8a36f79d5c1c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LLM4Binary_llm4decompile-1.3b-v2/1762652579.7068748", - "retrieved_timestamp": "1762652579.706877", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LLM4Binary/llm4decompile-1.3b-v2", - "developer": "LLM4Binary", - "inference_platform": "unknown", - "id": "LLM4Binary/llm4decompile-1.3b-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22678936333373229 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271808417267589 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23573825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4071770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12092752659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.346 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Langboat/Langboat_Mengzi3-8B-Chat/13e12b5c-d3bb-4634-967d-e5741e623be1.json b/leaderboard_data/HFOpenLLMv2/Langboat/Langboat_Mengzi3-8B-Chat/13e12b5c-d3bb-4634-967d-e5741e623be1.json deleted file mode 100644 index 21cc4135ea51d2e89dfe5ecfd7d08d37c261a2fa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Langboat/Langboat_Mengzi3-8B-Chat/13e12b5c-d3bb-4634-967d-e5741e623be1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Langboat_Mengzi3-8B-Chat/1762652579.707526", - "retrieved_timestamp": "1762652579.707527", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Langboat/Mengzi3-8B-Chat", - "developer": "Langboat", - "inference_platform": "unknown", - "id": "Langboat/Mengzi3-8B-Chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.513977357854936 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4683725003203179 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4077916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31416223404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBA100/745591e3-3c6a-473a-9e51-4bffe1c86fa7.json b/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBA100/745591e3-3c6a-473a-9e51-4bffe1c86fa7.json deleted file mode 100644 index 8146728c649ba1be9f789b4c59fbe370227b696c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBA100/745591e3-3c6a-473a-9e51-4bffe1c86fa7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lawnakk_BBA100/1762652579.707814", - "retrieved_timestamp": "1762652579.707815", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lawnakk/BBA100", - "developer": "Lawnakk", - "inference_platform": "unknown", - "id": "Lawnakk/BBA100" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2075803312987318 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2825701502983552 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24412751677852348 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40196875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11220079787234043 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.0/61739e6e-92b0-4577-acd2-8c58ffc612a4.json b/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.0/61739e6e-92b0-4577-acd2-8c58ffc612a4.json deleted file mode 100644 index 00cefef81d6d08744f0e5a8490b4909b6e67d2fd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.0/61739e6e-92b0-4577-acd2-8c58ffc612a4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.0/1762652579.708328", - "retrieved_timestamp": "1762652579.708329", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lawnakk/BBALAW1.0", - "developer": "Lawnakk", - "inference_platform": "unknown", - "id": "Lawnakk/BBALAW1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13511482865463637 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28276697965906106 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3525729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11278257978723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 4.353 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.2/917081cc-ee33-4c1f-85b0-9256ef57f6b3.json b/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.2/917081cc-ee33-4c1f-85b0-9256ef57f6b3.json deleted file mode 100644 index 90250844c981d51342e5e1f2b7495de9c46db04f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.2/917081cc-ee33-4c1f-85b0-9256ef57f6b3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.2/1762652579.708597", - "retrieved_timestamp": "1762652579.708598", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lawnakk/BBALAW1.2", - "developer": "Lawnakk", - "inference_platform": "unknown", - "id": "Lawnakk/BBALAW1.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13543952268868825 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28112730419661675 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35790625000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11053856382978723 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 4.353 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.3/60fa19b9-bf1d-4f39-b421-cb59379f5206.json b/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.3/60fa19b9-bf1d-4f39-b421-cb59379f5206.json deleted file mode 100644 index 4dc5a8c3a44fcce3025d96652c2145c6c4efca6d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.3/60fa19b9-bf1d-4f39-b421-cb59379f5206.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.3/1762652579.70884", - "retrieved_timestamp": "1762652579.7088408", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lawnakk/BBALAW1.3", - "developer": "Lawnakk", - "inference_platform": "unknown", - "id": "Lawnakk/BBALAW1.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13543952268868825 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28269808045232453 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36190625000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.109375 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 4.353 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.6/684962b9-d734-4a10-a0cb-45bc4d957c2c.json b/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.6/684962b9-d734-4a10-a0cb-45bc4d957c2c.json deleted file mode 100644 index 12e48bcfb8c0c90973312cf3c0e2a975e67aa9ce..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.6/684962b9-d734-4a10-a0cb-45bc4d957c2c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.6/1762652579.7090619", - "retrieved_timestamp": "1762652579.7090628", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lawnakk/BBALAW1.6", - "developer": "Lawnakk", - "inference_platform": "unknown", - "id": "Lawnakk/BBALAW1.6" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5245437660961804 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.555356284691385 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36027190332326287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43684375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45071476063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.61/af87bb98-cc36-4c8d-9694-7e7428a899ac.json b/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.61/af87bb98-cc36-4c8d-9694-7e7428a899ac.json deleted file mode 100644 index 4805c16fd1f97b9e85c989a9649b37c273129f74..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.61/af87bb98-cc36-4c8d-9694-7e7428a899ac.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.61/1762652579.709277", - "retrieved_timestamp": "1762652579.7092779", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lawnakk/BBALAW1.61", - "developer": "Lawnakk", - "inference_platform": "unknown", - "id": "Lawnakk/BBALAW1.61" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5771253607095839 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5548582474785428 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36631419939577037 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4355104166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4470578457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.62/5dc300f1-e908-4d71-addc-2717e3702b12.json b/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.62/5dc300f1-e908-4d71-addc-2717e3702b12.json deleted file mode 100644 index 61137551f62fc04076892ce09bd7756efbac24cb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.62/5dc300f1-e908-4d71-addc-2717e3702b12.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.62/1762652579.709492", - "retrieved_timestamp": "1762652579.709493", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lawnakk/BBALAW1.62", - "developer": "Lawnakk", - "inference_platform": "unknown", - "id": "Lawnakk/BBALAW1.62" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5046099903810778 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5580519941056026 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2824773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45445478723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.63/6005fc02-9f02-436a-a535-ec68a3c6dbc6.json b/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.63/6005fc02-9f02-436a-a535-ec68a3c6dbc6.json deleted file mode 100644 index 09812cd8e58198d6129a300d752a47d437de323d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.63/6005fc02-9f02-436a-a535-ec68a3c6dbc6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.63/1762652579.709696", - "retrieved_timestamp": "1762652579.709697", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lawnakk/BBALAW1.63", - "developer": "Lawnakk", - "inference_platform": "unknown", - "id": "Lawnakk/BBALAW1.63" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44073835201709244 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5540633758841665 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37009063444108764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4303333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4470578457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.64/4a4ce0f8-c41f-469e-b7c7-a4e3d857377e.json b/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.64/4a4ce0f8-c41f-469e-b7c7-a4e3d857377e.json deleted file mode 100644 index c0798c4c89b31281e618cbe4a3a468db57b0f8a0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.64/4a4ce0f8-c41f-469e-b7c7-a4e3d857377e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.64/1762652579.709901", - "retrieved_timestamp": "1762652579.709902", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lawnakk/BBALAW1.64", - "developer": "Lawnakk", - "inference_platform": "unknown", - "id": "Lawnakk/BBALAW1.64" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13946107439371977 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27790701865141654 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3446666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11153590425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1/59b40f56-c27f-4b15-9288-b7033e2e4f26.json b/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1/59b40f56-c27f-4b15-9288-b7033e2e4f26.json deleted file mode 100644 index 5799c27666e905ad319b9ddff89e7e22e4c32b1b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1/59b40f56-c27f-4b15-9288-b7033e2e4f26.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1/1762652579.708089", - "retrieved_timestamp": "1762652579.70809", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lawnakk/BBALAW1", - "developer": "Lawnakk", - "inference_platform": "unknown", - "id": "Lawnakk/BBALAW1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19054442213327305 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28723681696502185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4152708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11211768617021277 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_CheckPoint_A/771366a5-e227-4ff8-b60f-744020994bec.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_CheckPoint_A/771366a5-e227-4ff8-b60f-744020994bec.json deleted file mode 100644 index 7de403a376fd00ffab50931b1de7fc13fd5c9dce..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_CheckPoint_A/771366a5-e227-4ff8-b60f-744020994bec.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_CheckPoint_A/1762652579.714355", - "retrieved_timestamp": "1762652579.714355", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/CheckPoint_A", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/CheckPoint_A" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45127927233074905 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4747699745968042 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4230833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28798204787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_CheckPoint_B/4e44fd55-9538-4065-8763-5d1c3d00be5d.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_CheckPoint_B/4e44fd55-9538-4065-8763-5d1c3d00be5d.json deleted file mode 100644 index e51118dbead63e64d5b8a20076a8d8a70f43498a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_CheckPoint_B/4e44fd55-9538-4065-8763-5d1c3d00be5d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_CheckPoint_B/1762652579.7146208", - "retrieved_timestamp": "1762652579.714622", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/CheckPoint_B", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/CheckPoint_B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4439852923576111 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47799475378324896 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38984375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29072473404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_CheckPoint_C/a4fe370d-1722-4fdf-bf75-8416baeaba19.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_CheckPoint_C/a4fe370d-1722-4fdf-bf75-8416baeaba19.json deleted file mode 100644 index c6971e6f6acc70a791207ee0ba3479271ac92a82..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_CheckPoint_C/a4fe370d-1722-4fdf-bf75-8416baeaba19.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_CheckPoint_C/1762652579.714836", - "retrieved_timestamp": "1762652579.714837", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/CheckPoint_C", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/CheckPoint_C" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34768968558979063 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45864215446207585 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4346145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30211103723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_CheckPoint_R1/7eba2aef-5c97-4526-92a8-d62bd5b59b6f.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_CheckPoint_R1/7eba2aef-5c97-4526-92a8-d62bd5b59b6f.json deleted file mode 100644 index c037bf164db7ba7ade973131927a0e90acbbba74..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_CheckPoint_R1/7eba2aef-5c97-4526-92a8-d62bd5b59b6f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_CheckPoint_R1/1762652579.715039", - "retrieved_timestamp": "1762652579.71504", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/CheckPoint_R1", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/CheckPoint_R1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17278376928771216 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4225419506658359 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4031458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22049534574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_LCARS_AI_001/f6b84bde-67aa-4c50-a46e-1f80605037de.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_LCARS_AI_001/f6b84bde-67aa-4c50-a46e-1f80605037de.json deleted file mode 100644 index b25a48e9a1acbdda935999992fe8e5e85cc73b95..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_LCARS_AI_001/f6b84bde-67aa-4c50-a46e-1f80605037de.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_LCARS_AI_001/1762652579.7152472", - "retrieved_timestamp": "1762652579.715248", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/LCARS_AI_001", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/LCARS_AI_001" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31094495937445976 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42578875825590146 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43836458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2670378989361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_LCARS_AI_1x4_003_SuperAI/db8614eb-2b53-460c-a80b-dceb47a9703f.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_LCARS_AI_1x4_003_SuperAI/db8614eb-2b53-460c-a80b-dceb47a9703f.json deleted file mode 100644 index 0aec10b89420ca88d0cca01f2417a10e1e360cae..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_LCARS_AI_1x4_003_SuperAI/db8614eb-2b53-460c-a80b-dceb47a9703f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_LCARS_AI_1x4_003_SuperAI/1762652579.7154438", - "retrieved_timestamp": "1762652579.715445", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/LCARS_AI_1x4_003_SuperAI", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/LCARS_AI_1x4_003_SuperAI" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41111251479407973 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49198503573704794 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4506145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29720744680851063 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.154 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_LCARS_AI_StarTrek_Computer/a3e19823-43ac-44ac-9dee-960a98139fa8.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_LCARS_AI_StarTrek_Computer/a3e19823-43ac-44ac-9dee-960a98139fa8.json deleted file mode 100644 index 6ccf5e323071ef7d5bcd78d8e86a908d549d4c09..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_LCARS_AI_StarTrek_Computer/a3e19823-43ac-44ac-9dee-960a98139fa8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_LCARS_AI_StarTrek_Computer/1762652579.7157388", - "retrieved_timestamp": "1762652579.715741", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/LCARS_AI_StarTrek_Computer", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/LCARS_AI_StarTrek_Computer" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35825609383103496 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4446191188748297 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3950208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24584441489361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_LCARS_TOP_SCORE/04631aa2-f1fd-4aea-ba88-53b474c71fe8.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_LCARS_TOP_SCORE/04631aa2-f1fd-4aea-ba88-53b474c71fe8.json deleted file mode 100644 index 9da6bb4508917395333c48d14a9a7e334e7adf73..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_LCARS_TOP_SCORE/04631aa2-f1fd-4aea-ba88-53b474c71fe8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_LCARS_TOP_SCORE/1762652579.716028", - "retrieved_timestamp": "1762652579.716029", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/LCARS_TOP_SCORE", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/LCARS_TOP_SCORE" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43706587410293574 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5127371051825098 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42928125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3031083776595745 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_Mixtral_AI_SwahiliTron_7b/4f5fadb6-5fad-4b82-a027-1d4f497dc476.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_Mixtral_AI_SwahiliTron_7b/4f5fadb6-5fad-4b82-a027-1d4f497dc476.json deleted file mode 100644 index f6458571cbf147905ee8705563a05c0f7c212996..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_Mixtral_AI_SwahiliTron_7b/4f5fadb6-5fad-4b82-a027-1d4f497dc476.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_Mixtral_AI_SwahiliTron_7b/1762652579.716297", - "retrieved_timestamp": "1762652579.716299", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/Mixtral_AI_SwahiliTron_7b", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/Mixtral_AI_SwahiliTron_7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1533996462718919 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3055092453201354 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34203125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12076130319148937 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWebAI_Human_AGI/8e1f811e-3e86-4440-a5dd-bf607aa02ad6.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWebAI_Human_AGI/8e1f811e-3e86-4440-a5dd-bf607aa02ad6.json deleted file mode 100644 index 263b427554f3e48a57ef5579fa87f36d2621fb4b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWebAI_Human_AGI/8e1f811e-3e86-4440-a5dd-bf607aa02ad6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWebAI_Human_AGI/1762652579.7166212", - "retrieved_timestamp": "1762652579.716622", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/SpydazWebAI_Human_AGI", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWebAI_Human_AGI" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3388221031308041 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3374862127508733 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39663541666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1478557180851064 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWebAI_Human_AGI_001/a4c9a905-1a7c-406a-ab38-6a5e71ed0bf5.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWebAI_Human_AGI_001/a4c9a905-1a7c-406a-ab38-6a5e71ed0bf5.json deleted file mode 100644 index a7ef6fd804e74eed8e834e3216723a23a3a270df..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWebAI_Human_AGI_001/a4c9a905-1a7c-406a-ab38-6a5e71ed0bf5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWebAI_Human_AGI_001/1762652579.716855", - "retrieved_timestamp": "1762652579.716856", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/SpydazWebAI_Human_AGI_001", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWebAI_Human_AGI_001" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31181930610779396 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3433421938604874 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39939583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14261968085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_CyberTron_Ultra_7b/e8b992b8-9f0a-4bfb-ab53-3b07ca1ca117.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_CyberTron_Ultra_7b/e8b992b8-9f0a-4bfb-ab53-3b07ca1ca117.json deleted file mode 100644 index 945c5e8bb1174d2f07709408284d108319f7862f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_CyberTron_Ultra_7b/e8b992b8-9f0a-4bfb-ab53-3b07ca1ca117.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_CyberTron_Ultra_7b/1762652579.71707", - "retrieved_timestamp": "1762652579.717071", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_CyberTron_Ultra_7b", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_CyberTron_Ultra_7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15557276914143361 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48107736108561827 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41362499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2865691489361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAGI_001_M2/daa704a9-2eed-4549-a847-3606c9e8a733.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAGI_001_M2/daa704a9-2eed-4549-a847-3606c9e8a733.json deleted file mode 100644 index 8681a5aeadc15a5389bf88b4220098e8605dea42..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAGI_001_M2/daa704a9-2eed-4549-a847-3606c9e8a733.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAGI_001_M2/1762652579.71728", - "retrieved_timestamp": "1762652579.717281", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAGI_001_M2", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAGI_001_M2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39395138233221183 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4888172059118469 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4503020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.300531914893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAGI_002/3a6cfbae-80c1-4ec6-9c14-1ddeeb6e7138.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAGI_002/3a6cfbae-80c1-4ec6-9c14-1ddeeb6e7138.json deleted file mode 100644 index 8aec5a3d9bf6e6ac01fb4974aa3378e459885095..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAGI_002/3a6cfbae-80c1-4ec6-9c14-1ddeeb6e7138.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAGI_002/1762652579.71767", - "retrieved_timestamp": "1762652579.7176719", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAGI_002", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAGI_002" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40876430094371824 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5043871825389313 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48648958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3058510638297872 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_001/f177b7f7-7143-4f72-9f9d-54fe2bc9797b.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_001/f177b7f7-7143-4f72-9f9d-54fe2bc9797b.json deleted file mode 100644 index a2e7d3f1cd1735ffb0c4e82e0b6abcb2fc4c9ee6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_001/f177b7f7-7143-4f72-9f9d-54fe2bc9797b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_001/1762652579.717986", - "retrieved_timestamp": "1762652579.717987", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_001", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_001" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22516589316347294 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33440360243051986 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38603125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1270777925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_006/cdbebbea-4749-472b-8cec-5da5ffa96d65.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_006/cdbebbea-4749-472b-8cec-5da5ffa96d65.json deleted file mode 100644 index 8adb61d18c16d3fa1b15fad68ccc6d62a427e9f3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_006/cdbebbea-4749-472b-8cec-5da5ffa96d65.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_006/1762652579.718229", - "retrieved_timestamp": "1762652579.71823", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_006", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_006" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14300832901146734 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3301800420981355 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3567916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11353058510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_007/3143a635-10da-4cb5-9c2f-eae2988d9e60.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_007/3143a635-10da-4cb5-9c2f-eae2988d9e60.json deleted file mode 100644 index f9e8ec64a0795422a9167822646d769649b36528..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_007/3143a635-10da-4cb5-9c2f-eae2988d9e60.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_007/1762652579.718461", - "retrieved_timestamp": "1762652579.718461", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_007", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_007" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3351751131442351 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3415665794743605 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.022658610271903322 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40962499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13522273936170212 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_009_CHAT/a6d3b7b1-8834-4b74-8849-6d80381c46f5.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_009_CHAT/a6d3b7b1-8834-4b74-8849-6d80381c46f5.json deleted file mode 100644 index 5fdb2920f524e2b4035075e27a62e4dfd19bf07f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_009_CHAT/a6d3b7b1-8834-4b74-8849-6d80381c46f5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_009_CHAT/1762652579.718692", - "retrieved_timestamp": "1762652579.718693", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_009_CHAT", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_009_CHAT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2973310815303395 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3306728717792965 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1432845744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_010_CHAT/7f53cef7-fba6-4802-93a2-b54f82a32d74.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_010_CHAT/7f53cef7-fba6-4802-93a2-b54f82a32d74.json deleted file mode 100644 index 6329d771b15e96cb55624cf95726ebb12046e796..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_010_CHAT/7f53cef7-fba6-4802-93a2-b54f82a32d74.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_010_CHAT/1762652579.7189271", - "retrieved_timestamp": "1762652579.7189288", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_010_CHAT", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_010_CHAT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2506948230694557 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33363164762455844 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41371874999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14303523936170212 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT/bc7bf4d0-45e9-4b37-8e5f-edc92fb1bd66.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT/bc7bf4d0-45e9-4b37-8e5f-edc92fb1bd66.json deleted file mode 100644 index 373b05b481cd5c29cbcd4e5bc4a31ee393dd77af..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT/bc7bf4d0-45e9-4b37-8e5f-edc92fb1bd66.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT/1762652579.719242", - "retrieved_timestamp": "1762652579.719243", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3148667757106699 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3522609512356862 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3831458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15949135638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT_ML/fbd83964-530c-4d0e-a305-9f8451affb23.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT_ML/fbd83964-530c-4d0e-a305-9f8451affb23.json deleted file mode 100644 index e061856aec1fa81f4166d2108319bc2f9eee562e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT_ML/fbd83964-530c-4d0e-a305-9f8451affb23.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT_ML/1762652579.719551", - "retrieved_timestamp": "1762652579.719552", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37524213531208306 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39840187861283577 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0256797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42391666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2018783244680851 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1/10d76569-edca-47db-abf2-1d0fd73df198.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1/10d76569-edca-47db-abf2-1d0fd73df198.json deleted file mode 100644 index b6e9a467c1e692293479adddf3b8b2bc22049f53..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1/10d76569-edca-47db-abf2-1d0fd73df198.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1/1762652579.7198021", - "retrieved_timestamp": "1762652579.7198029", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4049677079039171 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48583341042911066 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3921354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2956283244680851 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/431f8459-3c12-4260-a158-c58ec910590d.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/431f8459-3c12-4260-a158-c58ec910590d.json deleted file mode 100644 index 49ec3a51480c21b1e60aed017c8b234f2b3a41ef..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/431f8459-3c12-4260-a158-c58ec910590d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/1762652579.720226", - "retrieved_timestamp": "1762652579.720227", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30664858131978706 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45768864760562744 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0445619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23179853723404256 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/bcd8c141-d286-4567-bb06-934e546a5c7c.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/bcd8c141-d286-4567-bb06-934e546a5c7c.json deleted file mode 100644 index 8378dc34129ad99920482f6424adc3551a490e3c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/bcd8c141-d286-4567-bb06-934e546a5c7c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/1762652579.720018", - "retrieved_timestamp": "1762652579.7200189", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30355124403250044 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4575107149412439 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0445619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42534374999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23287898936170212 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_MX/9cc77018-d090-4202-bcf5-d0031097b84e.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_MX/9cc77018-d090-4202-bcf5-d0031097b84e.json deleted file mode 100644 index a155f454f3a8678545ead5a3900ce9c10c6e9b63..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_MX/9cc77018-d090-4202-bcf5-d0031097b84e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_MX/1762652579.7204201", - "retrieved_timestamp": "1762652579.720421", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_MX", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_MX" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3065987136353764 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3158421938604874 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34438541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11070478723404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/0b365c44-3cc2-4149-8614-7de6b6c2581d.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/0b365c44-3cc2-4149-8614-7de6b6c2581d.json deleted file mode 100644 index 56fc9fd4a3d26997e6ba676f98d9ebce6cbc5684..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/0b365c44-3cc2-4149-8614-7de6b6c2581d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/1762652579.72064", - "retrieved_timestamp": "1762652579.7206411", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35788153211257245 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4476544560399054 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41340625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23761635638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/dc90b971-313a-4a76-b042-350adf37a43c.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/dc90b971-313a-4a76-b042-350adf37a43c.json deleted file mode 100644 index 21577c849a3bd847880bc8bded5a033270c8805b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/dc90b971-313a-4a76-b042-350adf37a43c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/1762652579.720855", - "retrieved_timestamp": "1762652579.720855", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37976347203198624 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44827466097749213 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4148020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2388630319148936 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_RP/a4a38b96-036f-40db-8a0b-024a36f004f5.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_RP/a4a38b96-036f-40db-8a0b-024a36f004f5.json deleted file mode 100644 index a674eb7083fa600436c83c75cf3030e0330983c8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_RP/a4a38b96-036f-40db-8a0b-024a36f004f5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_RP/1762652579.721039", - "retrieved_timestamp": "1762652579.7210398", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_RP", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2541168543907942 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33230179059744286 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3882604166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1323969414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_TextVision/558a0ed7-a667-421e-bbab-094b46274239.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_TextVision/558a0ed7-a667-421e-bbab-094b46274239.json deleted file mode 100644 index 29293bcfb6b01b7f105f9086081d826bf6280d2a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_TextVision/558a0ed7-a667-421e-bbab-094b46274239.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_TextVision/1762652579.7212439", - "retrieved_timestamp": "1762652579.7212448", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_AI_HumanAI_TextVision", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_AI_HumanAI_TextVision" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062740196013245 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33536617928965984 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39384375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13871343085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_HumanAI_M1/ee856df0-01ea-4f06-9323-951144c9e82f.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_HumanAI_M1/ee856df0-01ea-4f06-9323-951144c9e82f.json deleted file mode 100644 index 764f11cd83e516b0575f9628c54f39c16a879d40..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_HumanAI_M1/ee856df0-01ea-4f06-9323-951144c9e82f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_HumanAI_M1/1762652579.721453", - "retrieved_timestamp": "1762652579.721453", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_HumanAI_M1", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_HumanAI_M1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3582062261466243 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35632705798398107 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36711458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1663065159574468 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_HumanAI_M2/4ea0436d-6ec9-40db-af56-2f7f1b0317df.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_HumanAI_M2/4ea0436d-6ec9-40db-af56-2f7f1b0317df.json deleted file mode 100644 index 65bd79fbbd2e41c157ea40638fcb4bc95a44b0d6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_HumanAI_M2/4ea0436d-6ec9-40db-af56-2f7f1b0317df.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_HumanAI_M2/1762652579.7216609", - "retrieved_timestamp": "1762652579.721662", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_HumanAI_M2", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_HumanAI_M2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3750171766468526 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39308772552915555 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3751458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2010472074468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_HumanAI_M3/d5dd0be3-e7a7-4636-b513-3c1d5532807f.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_HumanAI_M3/d5dd0be3-e7a7-4636-b513-3c1d5532807f.json deleted file mode 100644 index a93e513f28a5d8519ea0eb8e34cf952196f6ab27..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_HumanAI_M3/d5dd0be3-e7a7-4636-b513-3c1d5532807f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_HumanAI_M3/1762652579.721856", - "retrieved_timestamp": "1762652579.721857", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/SpydazWeb_HumanAI_M3", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/SpydazWeb_HumanAI_M3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1578711153073844 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31272572546166244 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3914270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11486037234042554 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_12/b4b57280-49db-4a07-929f-dbe2f222250c.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_12/b4b57280-49db-4a07-929f-dbe2f222250c.json deleted file mode 100644 index b9d90ecfc6e32d202e4290d1db45d44395280484..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_12/b4b57280-49db-4a07-929f-dbe2f222250c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_12/1762652579.722054", - "retrieved_timestamp": "1762652579.722055", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_12", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_12" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2764985793250797 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31633960292107943 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35815624999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11369680851063829 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_14/6233aac6-0ce3-4f3c-8ee0-87d2482d3ea2.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_14/6233aac6-0ce3-4f3c-8ee0-87d2482d3ea2.json deleted file mode 100644 index ffb71597de3ac296fbfb1225c506ba9a3a30ad25..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_14/6233aac6-0ce3-4f3c-8ee0-87d2482d3ea2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_14/1762652579.722256", - "retrieved_timestamp": "1762652579.722257", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_14", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_14" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1811770546594148 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2988848127354542 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3395208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11394614361702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_001/51d4724b-c85c-4ad4-a4bd-9be93cd99a2a.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_001/51d4724b-c85c-4ad4-a4bd-9be93cd99a2a.json deleted file mode 100644 index a4a80808ffc4af6f2d8d05dbd51ca8d55d9b696a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_001/51d4724b-c85c-4ad4-a4bd-9be93cd99a2a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_001/1762652579.72245", - "retrieved_timestamp": "1762652579.722451", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_001", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_001" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4505046609662362 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4609124425176902 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42559375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2734375 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_002/86e8ff02-0dd2-4023-ab18-359d24a8a4fd.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_002/86e8ff02-0dd2-4023-ab18-359d24a8a4fd.json deleted file mode 100644 index 962a21159da06680d158574e9850943971ddf598..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_002/86e8ff02-0dd2-4023-ab18-359d24a8a4fd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_002/1762652579.7226508", - "retrieved_timestamp": "1762652579.7226508", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_002", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_002" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5306885729863429 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4682582050072746 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42546875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28939494680851063 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_MUSR/285688d5-c7ad-437b-a54c-9e6108d85267.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_MUSR/285688d5-c7ad-437b-a54c-9e6108d85267.json deleted file mode 100644 index b4eb63e3060756d8e946cfce79b2014aa64cd7de..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_MUSR/285688d5-c7ad-437b-a54c-9e6108d85267.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_MUSR/1762652579.722848", - "retrieved_timestamp": "1762652579.7228491", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_MUSR", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_MUSR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.478606763387811 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4671769411194033 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48689583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2828291223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_MasterCoder/85ce2909-a5f9-413a-8719-cd0a66874535.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_MasterCoder/85ce2909-a5f9-413a-8719-cd0a66874535.json deleted file mode 100644 index d62a462cfaa12b8eb914dc4c9396938e81633bbc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_MasterCoder/85ce2909-a5f9-413a-8719-cd0a66874535.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_MasterCoder/1762652579.723048", - "retrieved_timestamp": "1762652579.723048", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_MasterCoder", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_MasterCoder" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.414259719765777 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4689417813020516 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47197916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27194148936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_001/8a7df636-f1bb-4a74-bb7f-8a412edf6bd1.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_001/8a7df636-f1bb-4a74-bb7f-8a412edf6bd1.json deleted file mode 100644 index 60608b24da32629eff454f22e6fa8e3cd1a559c9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_001/8a7df636-f1bb-4a74-bb7f-8a412edf6bd1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_001/1762652579.723258", - "retrieved_timestamp": "1762652579.723258", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_001", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_001" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4571492528712705 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48178882135920675 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06948640483383686 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47784375000000007 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2681183510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_003/79336acd-d465-4938-af7f-f7a688f46fd4.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_003/79336acd-d465-4938-af7f-f7a688f46fd4.json deleted file mode 100644 index c6f0eb77c8dd55b878bfecb1cecc1b83393b2698..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_003/79336acd-d465-4938-af7f-f7a688f46fd4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_003/1762652579.723467", - "retrieved_timestamp": "1762652579.723468", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_003", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_003" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6200148938150774 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4755509035158693 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06948640483383686 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42019791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29986702127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent/ed000ee0-4193-46c4-8114-2ea3dbfec9f7.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent/ed000ee0-4193-46c4-8114-2ea3dbfec9f7.json deleted file mode 100644 index d58852002bc1a559b272341486119e45150acf46..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent/ed000ee0-4193-46c4-8114-2ea3dbfec9f7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent/1762652579.7236722", - "retrieved_timestamp": "1762652579.7236722", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5950854842927876 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4927473238025393 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5198229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2999501329787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_Student/89f92d24-19c1-4021-819d-9c7ed717046c.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_Student/89f92d24-19c1-4021-819d-9c7ed717046c.json deleted file mode 100644 index 88b3dd72bf44d431420c7d7dccbdd5ac15557c47..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_Student/89f92d24-19c1-4021-819d-9c7ed717046c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_Student/1762652579.723874", - "retrieved_timestamp": "1762652579.723874", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Student", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Student" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5735781060918363 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48808115770970123 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.50975 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.292719414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_Teacher/24fa44cb-86d9-4e67-be8f-42f7fc574d52.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_Teacher/24fa44cb-86d9-4e67-be8f-42f7fc574d52.json deleted file mode 100644 index 703f7effeafc32ae39bbd77145b6cbdf001f8f4b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_Teacher/24fa44cb-86d9-4e67-be8f-42f7fc574d52.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_Teacher/1762652579.7241092", - "retrieved_timestamp": "1762652579.7241101", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Teacher", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Teacher" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5772250960784053 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4805094960871836 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5222395833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2956283244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_001/b13652e3-43f1-4670-94f7-1a0bbf622f33.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_001/b13652e3-43f1-4670-94f7-1a0bbf622f33.json deleted file mode 100644 index 0ecf40f5dd591cdc2141eda4537e1833047e799f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_001/b13652e3-43f1-4670-94f7-1a0bbf622f33.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_001/1762652579.72431", - "retrieved_timestamp": "1762652579.724311", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_001", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_001" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5817963004827191 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4907982146977475 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4486041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29055851063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_002/8201723e-92fb-4207-afa8-df7db794c889.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_002/8201723e-92fb-4207-afa8-df7db794c889.json deleted file mode 100644 index 4d5bb7c8030a39c71baac5e3b4301e1fe3f6246a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_002/8201723e-92fb-4207-afa8-df7db794c889.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_002/1762652579.7245262", - "retrieved_timestamp": "1762652579.7245262", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_002", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_002" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.546150879665953 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4655028607746287 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45108333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28665226063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_Coder/e166fa17-c285-466e-ab2e-1eb106ebd271.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_Coder/e166fa17-c285-466e-ab2e-1eb106ebd271.json deleted file mode 100644 index 7e36131297ac1f226585c4a87bb750dd9563fda9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_Coder/e166fa17-c285-466e-ab2e-1eb106ebd271.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_Coder/1762652579.724742", - "retrieved_timestamp": "1762652579.724742", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Coder", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Coder" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4923702442851634 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46376531085099754 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5624583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28897938829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_Math/983323f2-7caa-42cb-8838-8ea041303a70.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_Math/983323f2-7caa-42cb-8838-8ea041303a70.json deleted file mode 100644 index af1ee1558fc6274e1c9537a1a6afdbef412db740..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_Math/983323f2-7caa-42cb-8838-8ea041303a70.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_Math/1762652579.7249558", - "retrieved_timestamp": "1762652579.724957", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Math", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Math" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5033112142448702 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4676503002757066 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4325729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29130651595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_MathMaster/a79378f7-01b3-4bf0-8b76-2e670d2a7366.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_MathMaster/a79378f7-01b3-4bf0-8b76-2e670d2a7366.json deleted file mode 100644 index 577f533c5aa37e1cde866ade78e86ad2335a1b30..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_MathMaster/a79378f7-01b3-4bf0-8b76-2e670d2a7366.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_MathMaster/1762652579.7251709", - "retrieved_timestamp": "1762652579.7251709", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_MathMaster", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_MathMaster" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5558429411738631 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47422312505675873 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45098958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2672041223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Student_Coder/1e7531fc-9f12-4c7c-8bf5-44511c37c23b.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Student_Coder/1e7531fc-9f12-4c7c-8bf5-44511c37c23b.json deleted file mode 100644 index 96e6f8469f1088b80d6ee5f245ba9e4f0c65a1de..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Student_Coder/1e7531fc-9f12-4c7c-8bf5-44511c37c23b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Student_Coder/1762652579.725384", - "retrieved_timestamp": "1762652579.725385", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Student_Coder", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Student_Coder" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5449518388985669 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4650844324968853 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06570996978851963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43883333333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684507978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Teacher_Coder/64c0088b-f9e7-4a9a-b449-3e1b514370ff.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Teacher_Coder/64c0088b-f9e7-4a9a-b449-3e1b514370ff.json deleted file mode 100644 index 4a003e76295bc168a266b0acff23311005840d53..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Teacher_Coder/64c0088b-f9e7-4a9a-b449-3e1b514370ff.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Teacher_Coder/1762652579.7256", - "retrieved_timestamp": "1762652579.725601", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Teacher_Coder", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Teacher_Coder" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5081572449988254 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47965526444811907 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4338125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28449135638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Top_Student/d652c8f6-d5b4-482f-91c7-5eb9529765c1.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Top_Student/d652c8f6-d5b4-482f-91c7-5eb9529765c1.json deleted file mode 100644 index b2e272a297b01a2fbbf11e4f9eddfbdaded28090..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Top_Student/d652c8f6-d5b4-482f-91c7-5eb9529765c1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Top_Student/1762652579.725811", - "retrieved_timestamp": "1762652579.725811", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Top_Student", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Top_Student" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6039530667517742 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49877449828070924 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07250755287009064 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5397916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30244348404255317 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_X1/7c72e837-92fd-4f3b-9c4f-205ffc93ac70.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_X1/7c72e837-92fd-4f3b-9c4f-205ffc93ac70.json deleted file mode 100644 index 48ee4f7d4d318c0436820acfbada8c9c2299a83e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_X1/7c72e837-92fd-4f3b-9c4f-205ffc93ac70.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_X1/1762652579.7260191", - "retrieved_timestamp": "1762652579.72602", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_X1", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_X1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.427323944910615 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47589342126093026 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4231770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2890625 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_X2/169fe3b3-527a-408f-9442-5bc3616cc320.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_X2/169fe3b3-527a-408f-9442-5bc3616cc320.json deleted file mode 100644 index 1ce8b240c0cc19077c7e03a0c5b3fcbdd713c03c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_X2/169fe3b3-527a-408f-9442-5bc3616cc320.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_X2/1762652579.7262201", - "retrieved_timestamp": "1762652579.726221", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_X2", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_X2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5433782364127182 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4785559277736029 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46953125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29205452127659576 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_RP_R1/fd4405cf-9849-4606-a01c-a20459198853.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_RP_R1/fd4405cf-9849-4606-a01c-a20459198853.json deleted file mode 100644 index b572d74c3db88a25ecf1db854eea6a7ea7ce54be..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_RP_R1/fd4405cf-9849-4606-a01c-a20459198853.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_RP_R1/1762652579.726439", - "retrieved_timestamp": "1762652579.72644", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_AGI_RP_R1", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_AGI_RP_R1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5426036250482054 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4701061648636955 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42013541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28939494680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_BIBLE_002/060f29d1-8b1d-4651-808d-b1419bd76cd9.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_BIBLE_002/060f29d1-8b1d-4651-808d-b1419bd76cd9.json deleted file mode 100644 index face45a369d427553c0af681116befec2257fe47..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_BIBLE_002/060f29d1-8b1d-4651-808d-b1419bd76cd9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_BIBLE_002/1762652579.72666", - "retrieved_timestamp": "1762652579.7266612", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_BIBLE_002", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_BIBLE_002" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21949538336059432 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3289070186514165 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34069791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13680186170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_ChatML_002/07981f28-b019-42f8-b14b-44ab73ebaa0a.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_ChatML_002/07981f28-b019-42f8-b14b-44ab73ebaa0a.json deleted file mode 100644 index abb87a72ea478eefc4a423c4e12af2b1f5e82e96..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_ChatML_002/07981f28-b019-42f8-b14b-44ab73ebaa0a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_ChatML_002/1762652579.7268748", - "retrieved_timestamp": "1762652579.726876", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_ChatML_002", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_ChatML_002" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24122772022677608 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3106383598957094 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3623125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10945811170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_ChatQA/4e72d3b7-4ebb-470d-8f86-66d6cb28095f.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_ChatQA/4e72d3b7-4ebb-470d-8f86-66d6cb28095f.json deleted file mode 100644 index 04e94baff056599ad8a51ee6df4ccb5c8f33911f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_ChatQA/4e72d3b7-4ebb-470d-8f86-66d6cb28095f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_ChatQA/1762652579.727107", - "retrieved_timestamp": "1762652579.727108", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_ChatQA", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_ChatQA" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1414591062824417 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32359493837413505 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14752327127659576 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_ChatQA_003/471aac2a-5c4b-4b1b-a56b-490fafc444d8.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_ChatQA_003/471aac2a-5c4b-4b1b-a56b-490fafc444d8.json deleted file mode 100644 index 14a58bd00cdfd509b0d8e54d6282e619635d0b69..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_ChatQA_003/471aac2a-5c4b-4b1b-a56b-490fafc444d8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_ChatQA_003/1762652579.727351", - "retrieved_timestamp": "1762652579.7273521", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_ChatQA_003", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_ChatQA_003" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22091938279321088 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3171811407815537 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38184375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11328125 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_TEMP_/f44f513c-0814-4f3b-94a4-9e28318da40e.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_TEMP_/f44f513c-0814-4f3b-94a4-9e28318da40e.json deleted file mode 100644 index 8c5d3b3697ff75a485ddb90e620ab4293f45e63d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_TEMP_/f44f513c-0814-4f3b-94a4-9e28318da40e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_TEMP_/1762652579.7275891", - "retrieved_timestamp": "1762652579.7275898", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_TEMP_", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_TEMP_" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47953097780555587 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.495695749059555 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42175 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3120844414893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_Top_Teacher_/a4beba0f-b860-4d7d-b1c3-0f569ba59171.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_Top_Teacher_/a4beba0f-b860-4d7d-b1c3-0f569ba59171.json deleted file mode 100644 index af98c2e3d5a9792c3795fd0c3b8dc70c8492fcbb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_Top_Teacher_/a4beba0f-b860-4d7d-b1c3-0f569ba59171.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_Top_Teacher_/1762652579.728002", - "retrieved_timestamp": "1762652579.728004", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LeroyDyer/_Spydaz_Web_AI_Top_Teacher_", - "developer": "LeroyDyer", - "inference_platform": "unknown", - "id": "LeroyDyer/_Spydaz_Web_AI_Top_Teacher_" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44038817005545283 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48909617780536035 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4366041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3149933510638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LightningRodLabs/LightningRodLabs_Flashlight-v1.0/cd4408c3-d966-4195-bcf2-5bc80eca1501.json b/leaderboard_data/HFOpenLLMv2/LightningRodLabs/LightningRodLabs_Flashlight-v1.0/cd4408c3-d966-4195-bcf2-5bc80eca1501.json deleted file mode 100644 index 8cc529ce4a3f4cb635343f7a415d521d3698fffe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LightningRodLabs/LightningRodLabs_Flashlight-v1.0/cd4408c3-d966-4195-bcf2-5bc80eca1501.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LightningRodLabs_Flashlight-v1.0/1762652579.7282822", - "retrieved_timestamp": "1762652579.728283", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LightningRodLabs/Flashlight-v1.0", - "developer": "LightningRodLabs", - "inference_platform": "unknown", - "id": "LightningRodLabs/Flashlight-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6745446526327921 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6876833310149727 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49697885196374625 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3422818791946309 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41009375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5402260638297872 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LightningRodLabs/LightningRodLabs_Flashlight-v1.1/64c75370-981d-43ae-9823-d4fb0696d468.json b/leaderboard_data/HFOpenLLMv2/LightningRodLabs/LightningRodLabs_Flashlight-v1.1/64c75370-981d-43ae-9823-d4fb0696d468.json deleted file mode 100644 index 0e4b603705f422bf366f7dab0bdfc330371dcf55..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LightningRodLabs/LightningRodLabs_Flashlight-v1.1/64c75370-981d-43ae-9823-d4fb0696d468.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LightningRodLabs_Flashlight-v1.1/1762652579.728596", - "retrieved_timestamp": "1762652579.728597", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LightningRodLabs/Flashlight-v1.1", - "developer": "LightningRodLabs", - "inference_platform": "unknown", - "id": "LightningRodLabs/Flashlight-v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6720967034136092 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6901141327534415 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976510067114096 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4047604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5415558510638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LightningRodLabs/LightningRodLabs_Flashlight-v1.2/404afbae-0393-48e6-874c-e1cb28e9a1eb.json b/leaderboard_data/HFOpenLLMv2/LightningRodLabs/LightningRodLabs_Flashlight-v1.2/404afbae-0393-48e6-874c-e1cb28e9a1eb.json deleted file mode 100644 index e4fb3456449485ee7f65475b0257ca55a4ba1454..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LightningRodLabs/LightningRodLabs_Flashlight-v1.2/404afbae-0393-48e6-874c-e1cb28e9a1eb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LightningRodLabs_Flashlight-v1.2/1762652579.728818", - "retrieved_timestamp": "1762652579.728819", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LightningRodLabs/Flashlight-v1.2", - "developer": "LightningRodLabs", - "inference_platform": "unknown", - "id": "LightningRodLabs/Flashlight-v1.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4359920566319587 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3264526807518731 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1555891238670695 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23573825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45536458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24850398936170212 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-2B-SLERP-V1/d53a7070-911a-4a5e-ba0c-766c4f39b3f5.json b/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-2B-SLERP-V1/d53a7070-911a-4a5e-ba0c-766c4f39b3f5.json deleted file mode 100644 index e942db1002d0613d4a765cfbfe0e45a52242e989..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-2B-SLERP-V1/d53a7070-911a-4a5e-ba0c-766c4f39b3f5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-2B-SLERP-V1/1762652579.7290292", - "retrieved_timestamp": "1762652579.72903", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V1", - "developer": "Lil-R", - "inference_platform": "unknown", - "id": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5823459531820016 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4287069505821554 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09138972809667674 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43746875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2677859042553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-2B-SLERP-V2/25368664-1f32-4d69-9afc-91d58efd01e2.json b/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-2B-SLERP-V2/25368664-1f32-4d69-9afc-91d58efd01e2.json deleted file mode 100644 index c7153e4d8cc73e0e2132dcb946c1b4f4b2054c3b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-2B-SLERP-V2/25368664-1f32-4d69-9afc-91d58efd01e2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-2B-SLERP-V2/1762652579.729285", - "retrieved_timestamp": "1762652579.729285", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V2", - "developer": "Lil-R", - "inference_platform": "unknown", - "id": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5542693386880144 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43764741906109417 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09441087613293052 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44816666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2744348404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V1/dcadbfb3-fbeb-4108-bc27-7ccfc7ba1e3a.json b/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V1/dcadbfb3-fbeb-4108-bc27-7ccfc7ba1e3a.json deleted file mode 100644 index 4c2b872ed4fb08c3e487bf36e812faac312891ae..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V1/dcadbfb3-fbeb-4108-bc27-7ccfc7ba1e3a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V1/1762652579.7297568", - "retrieved_timestamp": "1762652579.7297568", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V1", - "developer": "Lil-R", - "inference_platform": "unknown", - "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10733742026711349 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30525797550329686 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3910833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11236702127659574 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V2/41c47381-66d5-4d3a-8bfb-4269cb882385.json b/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V2/41c47381-66d5-4d3a-8bfb-4269cb882385.json deleted file mode 100644 index 2a4c35a11ae53745193fee295ffcaedde2cc98cf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V2/41c47381-66d5-4d3a-8bfb-4269cb882385.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V2/1762652579.729984", - "retrieved_timestamp": "1762652579.729985", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V2", - "developer": "Lil-R", - "inference_platform": "unknown", - "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10733742026711349 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30525797550329686 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3910833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11236702127659574 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V3/0c21359f-8f0b-44a8-813e-a5f612f13658.json b/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V3/0c21359f-8f0b-44a8-813e-a5f612f13658.json deleted file mode 100644 index 1067477e2716da340c031f7e814593eea10496d3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V3/0c21359f-8f0b-44a8-813e-a5f612f13658.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V3/1762652579.730203", - "retrieved_timestamp": "1762652579.730203", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V3", - "developer": "Lil-R", - "inference_platform": "unknown", - "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22346706738121516 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.357839880712804 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4107083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18168218085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-7B-SLERP/aa396cb3-10aa-4777-a185-fcb38ffc5ec3.json b/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-7B-SLERP/aa396cb3-10aa-4777-a185-fcb38ffc5ec3.json deleted file mode 100644 index f87fcf58b3a740d17e12d4f0534a3008922243e1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-7B-SLERP/aa396cb3-10aa-4777-a185-fcb38ffc5ec3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-7B-SLERP/1762652579.7294989", - "retrieved_timestamp": "1762652579.7294998", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lil-R/2_PRYMMAL-ECE-7B-SLERP", - "developer": "Lil-R", - "inference_platform": "unknown", - "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5577412376937636 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5556642048146725 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3632930513595166 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43960416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45071476063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_PRYMMAL-ECE-1B-SLERP-V1/a863e655-ee86-4f39-ae1a-0a65992f7eb4.json b/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_PRYMMAL-ECE-1B-SLERP-V1/a863e655-ee86-4f39-ae1a-0a65992f7eb4.json deleted file mode 100644 index 97184e57e16153c3c37faff35b871316c746a80d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_PRYMMAL-ECE-1B-SLERP-V1/a863e655-ee86-4f39-ae1a-0a65992f7eb4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lil-R_PRYMMAL-ECE-1B-SLERP-V1/1762652579.7304142", - "retrieved_timestamp": "1762652579.730415", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lil-R/PRYMMAL-ECE-1B-SLERP-V1", - "developer": "Lil-R", - "inference_platform": "unknown", - "id": "Lil-R/PRYMMAL-ECE-1B-SLERP-V1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2874395492847866 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41904526564708194 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39743749999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2925531914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_PRYMMAL-ECE-7B-SLERP-V8/6a81c514-57b9-4a45-9a1a-0378e7554d04.json b/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_PRYMMAL-ECE-7B-SLERP-V8/6a81c514-57b9-4a45-9a1a-0378e7554d04.json deleted file mode 100644 index 480fc05d2fa3b09f04b51751eec1effa2d523da7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_PRYMMAL-ECE-7B-SLERP-V8/6a81c514-57b9-4a45-9a1a-0378e7554d04.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lil-R_PRYMMAL-ECE-7B-SLERP-V8/1762652579.7306318", - "retrieved_timestamp": "1762652579.730633", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lil-R/PRYMMAL-ECE-7B-SLERP-V8", - "developer": "Lil-R", - "inference_platform": "unknown", - "id": "Lil-R/PRYMMAL-ECE-7B-SLERP-V8" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1258471965495995 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2955092966258663 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36314583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11278257978723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_10PRYMMAL-3B-slerp/e9371530-675d-48d1-9145-7ea15c893833.json b/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_10PRYMMAL-3B-slerp/e9371530-675d-48d1-9145-7ea15c893833.json deleted file mode 100644 index 4ccb0fe089fdafea2ac5c4de68ad55dca6e7e780..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_10PRYMMAL-3B-slerp/e9371530-675d-48d1-9145-7ea15c893833.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LilRg_10PRYMMAL-3B-slerp/1762652579.7308428", - "retrieved_timestamp": "1762652579.7308428", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LilRg/10PRYMMAL-3B-slerp", - "developer": "LilRg", - "inference_platform": "unknown", - "id": "LilRg/10PRYMMAL-3B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1945903535951276 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5320377091634505 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14954682779456194 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45290625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3881316489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_ECE-1B-merge-PRYMMAL/3fefac8e-d5aa-4998-ab60-6e3dcc49f77f.json b/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_ECE-1B-merge-PRYMMAL/3fefac8e-d5aa-4998-ab60-6e3dcc49f77f.json deleted file mode 100644 index f2f2261079f99786967bf4af7b8b8d55433abe06..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_ECE-1B-merge-PRYMMAL/3fefac8e-d5aa-4998-ab60-6e3dcc49f77f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LilRg_ECE-1B-merge-PRYMMAL/1762652579.7310941", - "retrieved_timestamp": "1762652579.731095", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LilRg/ECE-1B-merge-PRYMMAL", - "developer": "LilRg", - "inference_platform": "unknown", - "id": "LilRg/ECE-1B-merge-PRYMMAL" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27122811916825135 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42345600176908743 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10120845921450151 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3801041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2906416223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_ECE_Finetunning/f20fd926-d690-4fe2-80a4-3e79dc37f03f.json b/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_ECE_Finetunning/f20fd926-d690-4fe2-80a4-3e79dc37f03f.json deleted file mode 100644 index 1b0c45574a0b6405ec001a7fc12835464ac7cccd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_ECE_Finetunning/f20fd926-d690-4fe2-80a4-3e79dc37f03f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LilRg_ECE_Finetunning/1762652579.731307", - "retrieved_timestamp": "1762652579.731308", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LilRg/ECE_Finetunning", - "developer": "LilRg", - "inference_platform": "unknown", - "id": "LilRg/ECE_Finetunning" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04453849120334047 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47321596790730514 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38394791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3191489361702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 16.061 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-6B-slerp/8fedde0a-96fe-4a6f-9e0f-87832cfd418e.json b/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-6B-slerp/8fedde0a-96fe-4a6f-9e0f-87832cfd418e.json deleted file mode 100644 index dfa2ca042424a09593dc4a5db5345eb3d0906b19..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-6B-slerp/8fedde0a-96fe-4a6f-9e0f-87832cfd418e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-6B-slerp/1762652579.731526", - "retrieved_timestamp": "1762652579.7315269", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LilRg/PRYMMAL-6B-slerp", - "developer": "LilRg", - "inference_platform": "unknown", - "id": "LilRg/PRYMMAL-6B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11533065599276586 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28676215692036117 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36975 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1107878989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.293 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V3/a656eacf-8134-446c-8417-e1c3c54fe941.json b/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V3/a656eacf-8134-446c-8417-e1c3c54fe941.json deleted file mode 100644 index 94d56548c9a95bcc83c4c782eb9c14abeabece54..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V3/a656eacf-8134-446c-8417-e1c3c54fe941.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-ECE-7B-SLERP-V3/1762652579.731744", - "retrieved_timestamp": "1762652579.731745", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LilRg/PRYMMAL-ECE-7B-SLERP-V3", - "developer": "LilRg", - "inference_platform": "unknown", - "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12432346174816154 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2957239084980124 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36714583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11269946808510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V4/0d276bd3-a338-4383-88b0-9e653ae01387.json b/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V4/0d276bd3-a338-4383-88b0-9e653ae01387.json deleted file mode 100644 index 22d46842f1a77a691c1989406191575c0ceda57a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V4/0d276bd3-a338-4383-88b0-9e653ae01387.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-ECE-7B-SLERP-V4/1762652579.731953", - "retrieved_timestamp": "1762652579.7319539", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LilRg/PRYMMAL-ECE-7B-SLERP-V4", - "developer": "LilRg", - "inference_platform": "unknown", - "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12492298213185458 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2957239084980124 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36714583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11269946808510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V5/150d0730-e194-4d2b-96e1-54f914b5fe28.json b/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V5/150d0730-e194-4d2b-96e1-54f914b5fe28.json deleted file mode 100644 index 372e6d6b2e1b67e0829d0b8432c02510d7d6fc43..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V5/150d0730-e194-4d2b-96e1-54f914b5fe28.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-ECE-7B-SLERP-V5/1762652579.7321632", - "retrieved_timestamp": "1762652579.7321641", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LilRg/PRYMMAL-ECE-7B-SLERP-V5", - "developer": "LilRg", - "inference_platform": "unknown", - "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12492298213185458 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2957239084980124 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36714583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11269946808510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V6/b23913b9-f774-4927-be16-874d8e146218.json b/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V6/b23913b9-f774-4927-be16-874d8e146218.json deleted file mode 100644 index bb830035778e94c2c97a998f5be4f2432acfef52..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V6/b23913b9-f774-4927-be16-874d8e146218.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-ECE-7B-SLERP-V6/1762652579.732379", - "retrieved_timestamp": "1762652579.732379", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LilRg/PRYMMAL-ECE-7B-SLERP-V6", - "developer": "LilRg", - "inference_platform": "unknown", - "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V6" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12432346174816154 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2957239084980124 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36714583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11269946808510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V7/dd12d7df-9b32-4d2a-ae9a-40304cf4bfd7.json b/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V7/dd12d7df-9b32-4d2a-ae9a-40304cf4bfd7.json deleted file mode 100644 index 955a38e0f409372ddb41f93fb7e3b7f7338bb58a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V7/dd12d7df-9b32-4d2a-ae9a-40304cf4bfd7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-ECE-7B-SLERP-V7/1762652579.732605", - "retrieved_timestamp": "1762652579.732606", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LilRg/PRYMMAL-ECE-7B-SLERP-V7", - "developer": "LilRg", - "inference_platform": "unknown", - "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12492298213185458 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2957239084980124 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36714583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11269946808510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-slerp-Merge/9574abe0-00e3-4e38-bda0-b217f002a480.json b/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-slerp-Merge/9574abe0-00e3-4e38-bda0-b217f002a480.json deleted file mode 100644 index 5c0530fcadfa8ec9cec51ab5f4312454e5e02d9e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-slerp-Merge/9574abe0-00e3-4e38-bda0-b217f002a480.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-slerp-Merge/1762652579.732816", - "retrieved_timestamp": "1762652579.732817", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LilRg/PRYMMAL-slerp-Merge", - "developer": "LilRg", - "inference_platform": "unknown", - "id": "LilRg/PRYMMAL-slerp-Merge" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.304400102838247 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5364156271768925 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16163141993957703 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46347916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3863031914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/LimYeri/LimYeri_CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged/d020a655-1cc0-49e9-9db1-f8b871babd5c.json b/leaderboard_data/HFOpenLLMv2/LimYeri/LimYeri_CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged/d020a655-1cc0-49e9-9db1-f8b871babd5c.json deleted file mode 100644 index 750e9ad2171dbaec5f1e35e91891d1bb938057a7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/LimYeri/LimYeri_CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged/d020a655-1cc0-49e9-9db1-f8b871babd5c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LimYeri_CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged/1762652579.733827", - "retrieved_timestamp": "1762652579.733829", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged", - "developer": "LimYeri", - "inference_platform": "unknown", - "id": "LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6492406813920397 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48526582322240047 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3607916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3353557180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Locutusque/Locutusque_CollectiveLM-Falcon-3-7B/44737b7e-4942-4496-a818-fddce66da4d6.json b/leaderboard_data/HFOpenLLMv2/Locutusque/Locutusque_CollectiveLM-Falcon-3-7B/44737b7e-4942-4496-a818-fddce66da4d6.json deleted file mode 100644 index 3167c7e62df2b9d8a6792587c8309509455390c4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Locutusque/Locutusque_CollectiveLM-Falcon-3-7B/44737b7e-4942-4496-a818-fddce66da4d6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Locutusque_CollectiveLM-Falcon-3-7B/1762652579.734693", - "retrieved_timestamp": "1762652579.734694", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Locutusque/CollectiveLM-Falcon-3-7B", - "developer": "Locutusque", - "inference_platform": "unknown", - "id": "Locutusque/CollectiveLM-Falcon-3-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3918281271470808 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5105131374222629 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21827794561933533 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3887291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35987367021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Luni/Luni_StarDust-12b-v1/fa64b745-6b4b-4fee-b77e-d744e54a17d6.json b/leaderboard_data/HFOpenLLMv2/Luni/Luni_StarDust-12b-v1/fa64b745-6b4b-4fee-b77e-d744e54a17d6.json deleted file mode 100644 index 2b59cf1b7dd63f464e2ccaa6ef00448597ac8ff3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Luni/Luni_StarDust-12b-v1/fa64b745-6b4b-4fee-b77e-d744e54a17d6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Luni_StarDust-12b-v1/1762652579.736537", - "retrieved_timestamp": "1762652579.7365382", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Luni/StarDust-12b-v1", - "developer": "Luni", - "inference_platform": "unknown", - "id": "Luni/StarDust-12b-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5459259210007226 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5366139363101082 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43244791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34117353723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Luni/Luni_StarDust-12b-v2/401f6afc-9a2a-4bfe-87b2-daa6df848424.json b/leaderboard_data/HFOpenLLMv2/Luni/Luni_StarDust-12b-v2/401f6afc-9a2a-4bfe-87b2-daa6df848424.json deleted file mode 100644 index 564236d26296c752e791ca52fc0078fbbe9d0b6a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Luni/Luni_StarDust-12b-v2/401f6afc-9a2a-4bfe-87b2-daa6df848424.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Luni_StarDust-12b-v2/1762652579.736784", - "retrieved_timestamp": "1762652579.736785", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Luni/StarDust-12b-v2", - "developer": "Luni", - "inference_platform": "unknown", - "id": "Luni/StarDust-12b-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5628620947973599 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5419479534912178 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4338125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3439162234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Lyte/Lyte_Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3/8fdc62c0-215c-4502-8f56-188455fe2d9e.json b/leaderboard_data/HFOpenLLMv2/Lyte/Lyte_Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3/8fdc62c0-215c-4502-8f56-188455fe2d9e.json deleted file mode 100644 index 7b5062a2fdaf6082fe45f9ca4d3d78919f21d905..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Lyte/Lyte_Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3/8fdc62c0-215c-4502-8f56-188455fe2d9e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lyte_Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3/1762652579.74142", - "retrieved_timestamp": "1762652579.74142", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lyte/Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3", - "developer": "Lyte", - "inference_platform": "unknown", - "id": "Lyte/Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7098155117310957 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4949521619329585 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1903323262839879 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.346125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36178523936170215 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Lyte/Lyte_Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04/ea928079-f00f-41b1-a628-c1539b41e63d.json b/leaderboard_data/HFOpenLLMv2/Lyte/Lyte_Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04/ea928079-f00f-41b1-a628-c1539b41e63d.json deleted file mode 100644 index 6e299d24d1173edf21b96a4ff94c011b91e12b28..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Lyte/Lyte_Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04/ea928079-f00f-41b1-a628-c1539b41e63d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lyte_Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04/1762652579.7416818", - "retrieved_timestamp": "1762652579.741683", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lyte/Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04", - "developer": "Lyte", - "inference_platform": "unknown", - "id": "Lyte/Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5773503193748144 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3515036874279285 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08006042296072508 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32355208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18425864361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MEscriva/MEscriva_ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/ab59c1cb-ac90-4fe1-b782-2e038734366e.json b/leaderboard_data/HFOpenLLMv2/MEscriva/MEscriva_ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/ab59c1cb-ac90-4fe1-b782-2e038734366e.json deleted file mode 100644 index 40c11e18c05d6a32592a1986e2a1983676c9fe7f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MEscriva/MEscriva_ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/ab59c1cb-ac90-4fe1-b782-2e038734366e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MEscriva_ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/1762652579.7424488", - "retrieved_timestamp": "1762652579.7424488", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MEscriva/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis", - "developer": "MEscriva", - "inference_platform": "unknown", - "id": "MEscriva/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08662903318749807 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.305728612437881 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40171874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11544215425531915 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MTSAIR/MTSAIR_Cotype-Nano/b5fa19ff-9b05-4d71-9d79-54f8dfe4a8ab.json b/leaderboard_data/HFOpenLLMv2/MTSAIR/MTSAIR_Cotype-Nano/b5fa19ff-9b05-4d71-9d79-54f8dfe4a8ab.json deleted file mode 100644 index bc85947ba96ba014f9b496eecbf2e3950e2437ad..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MTSAIR/MTSAIR_Cotype-Nano/b5fa19ff-9b05-4d71-9d79-54f8dfe4a8ab.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MTSAIR_Cotype-Nano/1762652579.742943", - "retrieved_timestamp": "1762652579.742944", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MTSAIR/Cotype-Nano", - "developer": "MTSAIR", - "inference_platform": "unknown", - "id": "MTSAIR/Cotype-Nano" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3747922179816221 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3864940969601492 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3289166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24767287234042554 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MTSAIR/MTSAIR_MultiVerse_70B/a713dba7-110a-40a0-9d89-d48567d423af.json b/leaderboard_data/HFOpenLLMv2/MTSAIR/MTSAIR_MultiVerse_70B/a713dba7-110a-40a0-9d89-d48567d423af.json deleted file mode 100644 index 2b9060f16e5b03779bb7d01087afb8a055e98355..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MTSAIR/MTSAIR_MultiVerse_70B/a713dba7-110a-40a0-9d89-d48567d423af.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MTSAIR_MultiVerse_70B/1762652579.743202", - "retrieved_timestamp": "1762652579.7432032", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MTSAIR/MultiVerse_70B", - "developer": "MTSAIR", - "inference_platform": "unknown", - "id": "MTSAIR/MultiVerse_70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5249183278146429 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6183134284931178 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540268456375839 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47398958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48603723404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 72.289 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_Llama-3-8B-Magpie-Align-SFT-v0.1/f3024d7f-f25f-4220-973a-b0e19ecb5e1d.json b/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_Llama-3-8B-Magpie-Align-SFT-v0.1/f3024d7f-f25f-4220-973a-b0e19ecb5e1d.json deleted file mode 100644 index 65c689416cec4a0fda147a56f0920141366ea969..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_Llama-3-8B-Magpie-Align-SFT-v0.1/f3024d7f-f25f-4220-973a-b0e19ecb5e1d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3-8B-Magpie-Align-SFT-v0.1/1762652579.743415", - "retrieved_timestamp": "1762652579.743416", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.1", - "developer": "Magpie-Align", - "inference_platform": "unknown", - "id": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4361416596851908 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4615102744527366 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32773958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2863198138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_Llama-3-8B-Magpie-Align-SFT-v0.3/4756be0b-fd98-467f-a256-73aabba09c97.json b/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_Llama-3-8B-Magpie-Align-SFT-v0.3/4756be0b-fd98-467f-a256-73aabba09c97.json deleted file mode 100644 index b42aec033b4a4891eb10395a9e396e1bd20bcc04..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_Llama-3-8B-Magpie-Align-SFT-v0.3/4756be0b-fd98-467f-a256-73aabba09c97.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3-8B-Magpie-Align-SFT-v0.3/1762652579.743664", - "retrieved_timestamp": "1762652579.743665", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.3", - "developer": "Magpie-Align", - "inference_platform": "unknown", - "id": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5063586838477463 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45715808996720547 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34237500000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902260638297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_Llama-3.1-8B-Magpie-Align-SFT-v0.1/43d2e788-e186-485d-8c34-10bdfd7a6b65.json b/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_Llama-3.1-8B-Magpie-Align-SFT-v0.1/43d2e788-e186-485d-8c34-10bdfd7a6b65.json deleted file mode 100644 index b691d52b934afdd5ef7f2dfba4005c35864b878e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_Llama-3.1-8B-Magpie-Align-SFT-v0.1/43d2e788-e186-485d-8c34-10bdfd7a6b65.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3.1-8B-Magpie-Align-SFT-v0.1/1762652579.744527", - "retrieved_timestamp": "1762652579.744527", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1", - "developer": "Magpie-Align", - "inference_platform": "unknown", - "id": "Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47820671374176077 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4764157817799906 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3397395833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29429853723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_MagpieLM-8B-Chat-v0.1/b14fcc84-7caf-4aa8-b728-8a1287a5c04a.json b/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_MagpieLM-8B-Chat-v0.1/b14fcc84-7caf-4aa8-b728-8a1287a5c04a.json deleted file mode 100644 index 9d2d80a8abaad71725a6619ba655b2ea75ef09f5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_MagpieLM-8B-Chat-v0.1/b14fcc84-7caf-4aa8-b728-8a1287a5c04a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Magpie-Align_MagpieLM-8B-Chat-v0.1/1762652579.744951", - "retrieved_timestamp": "1762652579.744951", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Magpie-Align/MagpieLM-8B-Chat-v0.1", - "developer": "Magpie-Align", - "inference_platform": "unknown", - "id": "Magpie-Align/MagpieLM-8B-Chat-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3700714105240761 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4172338260055306 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3500625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3194813829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_MagpieLM-8B-SFT-v0.1/eb307f58-db7e-44b3-bf03-7264a39bed69.json b/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_MagpieLM-8B-SFT-v0.1/eb307f58-db7e-44b3-bf03-7264a39bed69.json deleted file mode 100644 index f831d8c477c6a2aecc09af0ac27b27c7da1aeabc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_MagpieLM-8B-SFT-v0.1/eb307f58-db7e-44b3-bf03-7264a39bed69.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Magpie-Align_MagpieLM-8B-SFT-v0.1/1762652579.7451751", - "retrieved_timestamp": "1762652579.7451751", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Magpie-Align/MagpieLM-8B-SFT-v0.1", - "developer": "Magpie-Align", - "inference_platform": "unknown", - "id": "Magpie-Align/MagpieLM-8B-SFT-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4720619068515982 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45528501595553356 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0755287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3648854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2989527925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ManoloPueblo/ManoloPueblo_ContentCuisine_1-7B-slerp/74d2724e-9d5d-4142-9cff-3fd40c931882.json b/leaderboard_data/HFOpenLLMv2/ManoloPueblo/ManoloPueblo_ContentCuisine_1-7B-slerp/74d2724e-9d5d-4142-9cff-3fd40c931882.json deleted file mode 100644 index ef26313d3830f93a8dd4af921d71dedc4dc00d07..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ManoloPueblo/ManoloPueblo_ContentCuisine_1-7B-slerp/74d2724e-9d5d-4142-9cff-3fd40c931882.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ManoloPueblo_ContentCuisine_1-7B-slerp/1762652579.745631", - "retrieved_timestamp": "1762652579.745632", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ManoloPueblo/ContentCuisine_1-7B-slerp", - "developer": "ManoloPueblo", - "inference_platform": "unknown", - "id": "ManoloPueblo/ContentCuisine_1-7B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3907044419916932 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5188437309746964 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46719791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30535239361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ManoloPueblo/ManoloPueblo_LLM_MERGE_CC2/f7ca7fb6-b02c-4c27-afef-662bb62cd054.json b/leaderboard_data/HFOpenLLMv2/ManoloPueblo/ManoloPueblo_LLM_MERGE_CC2/f7ca7fb6-b02c-4c27-afef-662bb62cd054.json deleted file mode 100644 index 6be3c15dd2a94432fa3b899bccf0f6ce3a2a9a84..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ManoloPueblo/ManoloPueblo_LLM_MERGE_CC2/f7ca7fb6-b02c-4c27-afef-662bb62cd054.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ManoloPueblo_LLM_MERGE_CC2/1762652579.745891", - "retrieved_timestamp": "1762652579.745892", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ManoloPueblo/LLM_MERGE_CC2", - "developer": "ManoloPueblo", - "inference_platform": "unknown", - "id": "ManoloPueblo/LLM_MERGE_CC2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3853087585384557 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5209367401710429 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45929166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30319148936170215 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ManoloPueblo/ManoloPueblo_LLM_MERGE_CC3/1c3dfe6a-28e7-4125-a802-1898336b1beb.json b/leaderboard_data/HFOpenLLMv2/ManoloPueblo/ManoloPueblo_LLM_MERGE_CC3/1c3dfe6a-28e7-4125-a802-1898336b1beb.json deleted file mode 100644 index 992b2cf6f355bbe67f00c3ec23794f0e7b5294fe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ManoloPueblo/ManoloPueblo_LLM_MERGE_CC3/1c3dfe6a-28e7-4125-a802-1898336b1beb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ManoloPueblo_LLM_MERGE_CC3/1762652579.7460978", - "retrieved_timestamp": "1762652579.746099", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ManoloPueblo/LLM_MERGE_CC3", - "developer": "ManoloPueblo", - "inference_platform": "unknown", - "id": "ManoloPueblo/LLM_MERGE_CC3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3958751667797001 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5246290546274339 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07930513595166164 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4671666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3155751329787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MarinaraSpaghetti/MarinaraSpaghetti_NemoReRemix-12B/ac67a9d9-0f5a-4891-a9e5-2a924fbf4f72.json b/leaderboard_data/HFOpenLLMv2/MarinaraSpaghetti/MarinaraSpaghetti_NemoReRemix-12B/ac67a9d9-0f5a-4891-a9e5-2a924fbf4f72.json deleted file mode 100644 index e2e4d79ed00372b7f80aba761fefe86673edf678..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MarinaraSpaghetti/MarinaraSpaghetti_NemoReRemix-12B/ac67a9d9-0f5a-4891-a9e5-2a924fbf4f72.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MarinaraSpaghetti_NemoReRemix-12B/1762652579.7463942", - "retrieved_timestamp": "1762652579.746399", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MarinaraSpaghetti/NemoReRemix-12B", - "developer": "MarinaraSpaghetti", - "inference_platform": "unknown", - "id": "MarinaraSpaghetti/NemoReRemix-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33425089872649016 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5536511805668158 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4501458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3597905585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MarinaraSpaghetti/MarinaraSpaghetti_Nemomix-v4.0-12B/aeac3ed0-e93b-4fb2-bdd5-1fd06ccd3338.json b/leaderboard_data/HFOpenLLMv2/MarinaraSpaghetti/MarinaraSpaghetti_Nemomix-v4.0-12B/aeac3ed0-e93b-4fb2-bdd5-1fd06ccd3338.json deleted file mode 100644 index e8a9387e0a3dababb43372b7112ba3a959058f5c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MarinaraSpaghetti/MarinaraSpaghetti_Nemomix-v4.0-12B/aeac3ed0-e93b-4fb2-bdd5-1fd06ccd3338.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MarinaraSpaghetti_Nemomix-v4.0-12B/1762652579.746819", - "retrieved_timestamp": "1762652579.7468212", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MarinaraSpaghetti/Nemomix-v4.0-12B", - "developer": "MarinaraSpaghetti", - "inference_platform": "unknown", - "id": "MarinaraSpaghetti/Nemomix-v4.0-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5574664113441224 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5274986611124783 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42444791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36128656914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_MiniMathExpert-2_61B-ECE-PRYMMAL-Martial/2c99d2a7-7a5f-4357-ad92-745d8a718ee3.json b/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_MiniMathExpert-2_61B-ECE-PRYMMAL-Martial/2c99d2a7-7a5f-4357-ad92-745d8a718ee3.json deleted file mode 100644 index 6838cf4e514f27f7c798aa3d401e7d599a9b87b4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_MiniMathExpert-2_61B-ECE-PRYMMAL-Martial/2c99d2a7-7a5f-4357-ad92-745d8a718ee3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Marsouuu_MiniMathExpert-2_61B-ECE-PRYMMAL-Martial/1762652579.747071", - "retrieved_timestamp": "1762652579.747073", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Marsouuu/MiniMathExpert-2_61B-ECE-PRYMMAL-Martial", - "developer": "Marsouuu", - "inference_platform": "unknown", - "id": "Marsouuu/MiniMathExpert-2_61B-ECE-PRYMMAL-Martial" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25484159807089635 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3952730330493959 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40832291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22739361702127658 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_general3B-ECE-PRYMMAL-Martial/6f36320a-dcfd-4e93-87b2-53763dde5c57.json b/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_general3B-ECE-PRYMMAL-Martial/6f36320a-dcfd-4e93-87b2-53763dde5c57.json deleted file mode 100644 index af099c17c4133387a688e86d398546b6026cb99f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_general3B-ECE-PRYMMAL-Martial/6f36320a-dcfd-4e93-87b2-53763dde5c57.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Marsouuu_general3B-ECE-PRYMMAL-Martial/1762652579.748109", - "retrieved_timestamp": "1762652579.74811", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Marsouuu/general3B-ECE-PRYMMAL-Martial", - "developer": "Marsouuu", - "inference_platform": "unknown", - "id": "Marsouuu/general3B-ECE-PRYMMAL-Martial" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27222658102722996 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5394350977017502 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15483383685800603 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4700520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38763297872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_general3Bv2-ECE-PRYMMAL-Martial/716552b2-6343-4339-b9f5-a573fa47c384.json b/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_general3Bv2-ECE-PRYMMAL-Martial/716552b2-6343-4339-b9f5-a573fa47c384.json deleted file mode 100644 index 95475411b4e3d91799e86b89991a8eeaf09958a4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_general3Bv2-ECE-PRYMMAL-Martial/716552b2-6343-4339-b9f5-a573fa47c384.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Marsouuu_general3Bv2-ECE-PRYMMAL-Martial/1762652579.748472", - "retrieved_timestamp": "1762652579.7484732", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Marsouuu/general3Bv2-ECE-PRYMMAL-Martial", - "developer": "Marsouuu", - "inference_platform": "unknown", - "id": "Marsouuu/general3Bv2-ECE-PRYMMAL-Martial" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5692817280371636 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5636569831901026 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43960416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4498005319148936 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_lareneg1_78B-ECE-PRYMMAL-Martial/49532386-7e9b-4719-9c24-5d463dea6cfc.json b/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_lareneg1_78B-ECE-PRYMMAL-Martial/49532386-7e9b-4719-9c24-5d463dea6cfc.json deleted file mode 100644 index 3891ec167dc1fe7815f04025365c8180b1a7a099..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_lareneg1_78B-ECE-PRYMMAL-Martial/49532386-7e9b-4719-9c24-5d463dea6cfc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Marsouuu_lareneg1_78B-ECE-PRYMMAL-Martial/1762652579.7487411", - "retrieved_timestamp": "1762652579.7487419", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Marsouuu/lareneg1_78B-ECE-PRYMMAL-Martial", - "developer": "Marsouuu", - "inference_platform": "unknown", - "id": "Marsouuu/lareneg1_78B-ECE-PRYMMAL-Martial" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2794961812435449 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42301343044108936 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38673958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2922207446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_lareneg3B-ECE-PRYMMAL-Martial/8d0e995d-2859-461b-8be7-60d2b2690d6b.json b/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_lareneg3B-ECE-PRYMMAL-Martial/8d0e995d-2859-461b-8be7-60d2b2690d6b.json deleted file mode 100644 index e6da8c755742528289d36656912be0d8687a6ac6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_lareneg3B-ECE-PRYMMAL-Martial/8d0e995d-2859-461b-8be7-60d2b2690d6b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Marsouuu_lareneg3B-ECE-PRYMMAL-Martial/1762652579.748992", - "retrieved_timestamp": "1762652579.748993", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Marsouuu/lareneg3B-ECE-PRYMMAL-Martial", - "developer": "Marsouuu", - "inference_platform": "unknown", - "id": "Marsouuu/lareneg3B-ECE-PRYMMAL-Martial" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33032908239028 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5453325807578268 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15181268882175228 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47246875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37666223404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_lareneg3Bv2-ECE-PRYMMAL-Martial/09b5771f-9ee2-4f4f-9fa9-e0280c33b00f.json b/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_lareneg3Bv2-ECE-PRYMMAL-Martial/09b5771f-9ee2-4f4f-9fa9-e0280c33b00f.json deleted file mode 100644 index d702a851f79413887d3c8315da90da70e489d6f7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_lareneg3Bv2-ECE-PRYMMAL-Martial/09b5771f-9ee2-4f4f-9fa9-e0280c33b00f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Marsouuu_lareneg3Bv2-ECE-PRYMMAL-Martial/1762652579.749232", - "retrieved_timestamp": "1762652579.749232", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Marsouuu/lareneg3Bv2-ECE-PRYMMAL-Martial", - "developer": "Marsouuu", - "inference_platform": "unknown", - "id": "Marsouuu/lareneg3Bv2-ECE-PRYMMAL-Martial" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5753267995585047 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.562336014537904 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36555891238670696 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4369375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45113031914893614 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Calme-4x7B-MoE-v0.1/f4512664-c531-4b13-b76e-e96c2b03febf.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Calme-4x7B-MoE-v0.1/f4512664-c531-4b13-b76e-e96c2b03febf.json deleted file mode 100644 index a5fdd8645ca9cae2615262306aed7d606b334ade..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Calme-4x7B-MoE-v0.1/f4512664-c531-4b13-b76e-e96c2b03febf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Calme-4x7B-MoE-v0.1/1762652579.7495291", - "retrieved_timestamp": "1762652579.74953", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/Calme-4x7B-MoE-v0.1", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/Calme-4x7B-MoE-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4315205875964663 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5102819889174134 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08006042296072508 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4198854166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3056848404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.154 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Calme-4x7B-MoE-v0.2/ca2df1c9-79b2-453b-9cd1-b607e48f5dd7.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Calme-4x7B-MoE-v0.2/ca2df1c9-79b2-453b-9cd1-b607e48f5dd7.json deleted file mode 100644 index ffcbc703f75147c878e8c46545e1b84ddd92660c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Calme-4x7B-MoE-v0.2/ca2df1c9-79b2-453b-9cd1-b607e48f5dd7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Calme-4x7B-MoE-v0.2/1762652579.7498329", - "retrieved_timestamp": "1762652579.749834", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/Calme-4x7B-MoE-v0.2", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/Calme-4x7B-MoE-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.429447200095746 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5110766802558263 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43176041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30576795212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.154 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Llama-3-70B-Instruct-v0.1/1e2759fa-3e87-447b-b0ca-5a4e2e293589.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Llama-3-70B-Instruct-v0.1/1e2759fa-3e87-447b-b0ca-5a4e2e293589.json deleted file mode 100644 index a88c159124925999bb1fe41f5456871cd14fb426..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Llama-3-70B-Instruct-v0.1/1e2759fa-3e87-447b-b0ca-5a4e2e293589.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Llama-3-70B-Instruct-v0.1/1762652579.750048", - "retrieved_timestamp": "1762652579.750049", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/Llama-3-70B-Instruct-v0.1", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/Llama-3-70B-Instruct-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47143800671108216 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5366257615951637 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18051359516616314 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4433020833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4617686170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Llama-3-8B-Instruct-v0.10/19143059-07d5-44b2-b599-193147f6196a.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Llama-3-8B-Instruct-v0.10/19143059-07d5-44b2-b599-193147f6196a.json deleted file mode 100644 index 8534dc3444d99b784231fd0a14bd10a55be408cb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Llama-3-8B-Instruct-v0.10/19143059-07d5-44b2-b599-193147f6196a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Llama-3-8B-Instruct-v0.10/1762652579.750272", - "retrieved_timestamp": "1762652579.750272", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/Llama-3-8B-Instruct-v0.10", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/Llama-3-8B-Instruct-v0.10" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7667433520835827 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4924311866686311 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42143749999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38622007978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Llama-3-8B-Instruct-v0.8/c68859dd-6db0-4bdc-a031-92ac7d1d2585.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Llama-3-8B-Instruct-v0.8/c68859dd-6db0-4bdc-a031-92ac7d1d2585.json deleted file mode 100644 index 4097bb53d9764b4b518eac80a79a5e8d909f3bbe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Llama-3-8B-Instruct-v0.8/c68859dd-6db0-4bdc-a031-92ac7d1d2585.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Llama-3-8B-Instruct-v0.8/1762652579.750486", - "retrieved_timestamp": "1762652579.750487", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/Llama-3-8B-Instruct-v0.8", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/Llama-3-8B-Instruct-v0.8" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7527549125209998 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49627836815949883 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42019791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3853058510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Llama-3-8B-Instruct-v0.9/1fb0056b-4f66-404b-89ac-a58185747ce2.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Llama-3-8B-Instruct-v0.9/1fb0056b-4f66-404b-89ac-a58185747ce2.json deleted file mode 100644 index 1e9bcef47ef8487928166ff9f584b37eff34814e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Llama-3-8B-Instruct-v0.9/1fb0056b-4f66-404b-89ac-a58185747ce2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Llama-3-8B-Instruct-v0.9/1762652579.750697", - "retrieved_timestamp": "1762652579.750697", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/Llama-3-8B-Instruct-v0.9", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/Llama-3-8B-Instruct-v0.9" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.763046494412603 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4936132794870085 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4148020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3845578457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Qwen2-7B-Instruct-v0.1/ce4ee4fe-8a38-467b-b189-b25311c23c4e.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Qwen2-7B-Instruct-v0.1/ce4ee4fe-8a38-467b-b189-b25311c23c4e.json deleted file mode 100644 index c7d25bbd66e8aec907cad2ae270a403da34a075c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Qwen2-7B-Instruct-v0.1/ce4ee4fe-8a38-467b-b189-b25311c23c4e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Qwen2-7B-Instruct-v0.1/1762652579.7511811", - "retrieved_timestamp": "1762652579.751182", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/Qwen2-7B-Instruct-v0.1", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/Qwen2-7B-Instruct-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33522498082864577 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5123061019250074 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2212990936555891 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44347916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3857214095744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Qwen2-7B-Instruct-v0.8/a65af628-f518-4da7-afc5-7cba4234415b.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Qwen2-7B-Instruct-v0.8/a65af628-f518-4da7-afc5-7cba4234415b.json deleted file mode 100644 index 81e179870d3b94925e992488b74a60aeb204f973..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Qwen2-7B-Instruct-v0.8/a65af628-f518-4da7-afc5-7cba4234415b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Qwen2-7B-Instruct-v0.8/1762652579.751401", - "retrieved_timestamp": "1762652579.751402", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/Qwen2-7B-Instruct-v0.8", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/Qwen2-7B-Instruct-v0.8" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27747266142723526 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4637108491317945 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17673716012084592 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4293125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3566323138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-2.1-rys-78b/387000a4-7ef5-46c6-9b5e-9bfe7c2cfc18.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-2.1-rys-78b/387000a4-7ef5-46c6-9b5e-9bfe7c2cfc18.json deleted file mode 100644 index be46ff654f0292e9853001628eaee59b6d8ac440..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-2.1-rys-78b/387000a4-7ef5-46c6-9b5e-9bfe7c2cfc18.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-rys-78b/1762652579.752971", - "retrieved_timestamp": "1762652579.752971", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.1-rys-78b", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.1-rys-78b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8135547015252862 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7097861139530462 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3942598187311178 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39429530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4693125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5443816489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 77.965 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-2.2-rys-78b/cfaafe4c-50a1-4cde-b092-fdbaeea86fb3.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-2.2-rys-78b/cfaafe4c-50a1-4cde-b092-fdbaeea86fb3.json deleted file mode 100644 index 1c8b2e3e8f7a448db3b3da576167b85527ec01d7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-2.2-rys-78b/cfaafe4c-50a1-4cde-b092-fdbaeea86fb3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-rys-78b/1762652579.754511", - "retrieved_timestamp": "1762652579.754511", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.2-rys-78b", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.2-rys-78b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7986420475449585 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7081014602379213 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070996978851964 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40687919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45356250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.538563829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 77.965 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-2.3-rys-78b/33a06134-e58d-4bc7-8421-c5ae2f0dcd1f.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-2.3-rys-78b/33a06134-e58d-4bc7-8421-c5ae2f0dcd1f.json deleted file mode 100644 index 228b775cde56a3c079434a73d3e938e95fab6fc4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-2.3-rys-78b/33a06134-e58d-4bc7-8421-c5ae2f0dcd1f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-rys-78b/1762652579.7562392", - "retrieved_timestamp": "1762652579.7562408", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.3-rys-78b", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.3-rys-78b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8065854155862002 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7107763314317289 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39803625377643503 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40436241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45492708333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5475398936170213 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 77.965 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-2.4-rys-78b/48433dc8-40ff-4e36-8c6a-ced33bc22e4f.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-2.4-rys-78b/48433dc8-40ff-4e36-8c6a-ced33bc22e4f.json deleted file mode 100644 index 588ca075cc9aa74bce22641fc23cc9b67797189c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-2.4-rys-78b/48433dc8-40ff-4e36-8c6a-ced33bc22e4f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.4-rys-78b/1762652579.7570088", - "retrieved_timestamp": "1762652579.75701", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.4-rys-78b", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.4-rys-78b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8010899967641414 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7279510956242796 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070996978851964 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40268456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5770624999999999 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7002160904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 77.965 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.1-baguette-3b/8f0a6518-d153-43ec-b426-02136a2bc367.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.1-baguette-3b/8f0a6518-d153-43ec-b426-02136a2bc367.json deleted file mode 100644 index 18bba3292191c4525da433d03f27d41eb7d983a1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.1-baguette-3b/8f0a6518-d153-43ec-b426-02136a2bc367.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.1-baguette-3b/1762652579.7580318", - "retrieved_timestamp": "1762652579.7580328", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-3.1-baguette-3b", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-3.1-baguette-3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6234369251364158 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46833341042911075 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25604229607250756 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40079166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33992686170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.085 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.1-instruct-3b/67915bce-0b54-4996-90f6-cec6def9bbba.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.1-instruct-3b/67915bce-0b54-4996-90f6-cec6def9bbba.json deleted file mode 100644 index f97759091b428982e074dfd1413c4cf15cf24ab8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.1-instruct-3b/67915bce-0b54-4996-90f6-cec6def9bbba.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.1-instruct-3b/1762652579.758249", - "retrieved_timestamp": "1762652579.75825", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-3.1-instruct-3b", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-3.1-instruct-3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43359397509718656 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4812730148043098 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17749244712990936 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39520833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.355718085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.085 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.1-instruct-78b/898e5e91-c4c0-4494-baad-37c2bfd1931b.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.1-instruct-78b/898e5e91-c4c0-4494-baad-37c2bfd1931b.json deleted file mode 100644 index b70a6851d2921f4ceae94dd07318390d908b63f0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.1-instruct-78b/898e5e91-c4c0-4494-baad-37c2bfd1931b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.1-instruct-78b/1762652579.7584739", - "retrieved_timestamp": "1762652579.758475", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-3.1-instruct-78b", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-3.1-instruct-78b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8135547015252862 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7305154498840408 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39274924471299094 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3959731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5890624999999999 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.718500664893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 77.965 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.2-baguette-3b/e49441f3-99a5-4cdb-bff1-79cc21711bab.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.2-baguette-3b/e49441f3-99a5-4cdb-bff1-79cc21711bab.json deleted file mode 100644 index 4ef2faf8e803e1b74343d8ce3573f6cd861913d3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.2-baguette-3b/e49441f3-99a5-4cdb-bff1-79cc21711bab.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.2-baguette-3b/1762652579.75889", - "retrieved_timestamp": "1762652579.758891", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-3.2-baguette-3b", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-3.2-baguette-3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6338282423968404 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.470862269902714 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2824773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40209374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3337765957446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.085 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.2-instruct-3b/83e46bac-5266-4f65-a4dd-76240b297adc.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.2-instruct-3b/83e46bac-5266-4f65-a4dd-76240b297adc.json deleted file mode 100644 index 854147fe0623a41ff372b37a23df2299985d9b84..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.2-instruct-3b/83e46bac-5266-4f65-a4dd-76240b297adc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.2-instruct-3b/1762652579.759095", - "retrieved_timestamp": "1762652579.7590961", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-3.2-instruct-3b", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-3.2-instruct-3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5533196363426819 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4865641110376735 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21676737160120846 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40469791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36527593085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.2-instruct-78b/77cc280c-b794-4a9a-addc-e2eb0a1af896.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.2-instruct-78b/77cc280c-b794-4a9a-addc-e2eb0a1af896.json deleted file mode 100644 index 7e7dc4d5a1a3fc3603f23c31b059e5c8f29e86fe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.2-instruct-78b/77cc280c-b794-4a9a-addc-e2eb0a1af896.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.2-instruct-78b/1762652579.759298", - "retrieved_timestamp": "1762652579.759299", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-3.2-instruct-78b", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-3.2-instruct-78b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8062607215521482 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7318616272092674 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4033232628398791 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40268456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6023645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7303025265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 77.965 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.3-baguette-3b/22cbbb6d-1014-42af-96cf-1636fcb40679.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.3-baguette-3b/22cbbb6d-1014-42af-96cf-1636fcb40679.json deleted file mode 100644 index 7c43e9c6cdcc565cd67c578f86c8fdebf1d4feb4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.3-baguette-3b/22cbbb6d-1014-42af-96cf-1636fcb40679.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.3-baguette-3b/1762652579.759511", - "retrieved_timestamp": "1762652579.759511", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-3.3-baguette-3b", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-3.3-baguette-3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6359514975819713 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4678217295957521 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3806646525679758 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39282291666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3341921542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.3-instruct-3b/8aa85bd2-eab2-491b-95a3-ac6321cbe298.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.3-instruct-3b/8aa85bd2-eab2-491b-95a3-ac6321cbe298.json deleted file mode 100644 index 355fd2dd9bcd51301f5d3b03a8eaaaf4f6b9a81c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.3-instruct-3b/8aa85bd2-eab2-491b-95a3-ac6321cbe298.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.3-instruct-3b/1762652579.759784", - "retrieved_timestamp": "1762652579.759785", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-3.3-instruct-3b", - "developer": "MaziyarPanahi", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-3.3-instruct-3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6423212631373645 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46933409427688694 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37386706948640486 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40742708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053523936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Minami-su/Minami-su_test-7B-00/ba9ead4a-3d47-4a51-bc39-dbf72d7ff3af.json b/leaderboard_data/HFOpenLLMv2/Minami-su/Minami-su_test-7B-00/ba9ead4a-3d47-4a51-bc39-dbf72d7ff3af.json deleted file mode 100644 index 503a40553fd713520856be459002dd3bd82cfff1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Minami-su/Minami-su_test-7B-00/ba9ead4a-3d47-4a51-bc39-dbf72d7ff3af.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Minami-su_test-7B-00/1762652579.7606468", - "retrieved_timestamp": "1762652579.76065", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Minami-su/test-7B-00", - "developer": "Minami-su", - "inference_platform": "unknown", - "id": "Minami-su/test-7B-00" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6690492338107332 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44661237656101793 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4516616314199396 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41260416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3587932180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Minami-su/Minami-su_test-7B-01/2918f03e-3fd5-4183-be8d-2911e0204e8d.json b/leaderboard_data/HFOpenLLMv2/Minami-su/Minami-su_test-7B-01/2918f03e-3fd5-4183-be8d-2911e0204e8d.json deleted file mode 100644 index 02f3dec17a62892bd8a949c63e048155e623fbbd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Minami-su/Minami-su_test-7B-01/2918f03e-3fd5-4183-be8d-2911e0204e8d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Minami-su_test-7B-01/1762652579.761029", - "retrieved_timestamp": "1762652579.76103", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Minami-su/test-7B-01", - "developer": "Minami-su", - "inference_platform": "unknown", - "id": "Minami-su/test-7B-01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6736204382150472 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4422359420239754 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4554380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41530208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35355718085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Minami-su/Minami-su_test-v2-7B-00/95abd2ea-1fb7-4ef8-b186-bfe67148e486.json b/leaderboard_data/HFOpenLLMv2/Minami-su/Minami-su_test-v2-7B-00/95abd2ea-1fb7-4ef8-b186-bfe67148e486.json deleted file mode 100644 index 0d80d9c60a7dac62431482e3ee95bb4e76397f38..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Minami-su/Minami-su_test-v2-7B-00/95abd2ea-1fb7-4ef8-b186-bfe67148e486.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Minami-su_test-v2-7B-00/1762652579.76127", - "retrieved_timestamp": "1762652579.761271", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Minami-su/test-v2-7B-00", - "developer": "Minami-su", - "inference_platform": "unknown", - "id": "Minami-su/test-v2-7B-00" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6747197436136119 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4415989344595353 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4418429003021148 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41542708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3472406914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ModelCloud/ModelCloud_Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1/4a68c55f-ac3d-4173-a1cc-8bb97a2b8466.json b/leaderboard_data/HFOpenLLMv2/ModelCloud/ModelCloud_Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1/4a68c55f-ac3d-4173-a1cc-8bb97a2b8466.json deleted file mode 100644 index 1880985b97db3ccbe230892afc0e57ac1ca8a8ae..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ModelCloud/ModelCloud_Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1/4a68c55f-ac3d-4173-a1cc-8bb97a2b8466.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ModelCloud_Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1/1762652579.761516", - "retrieved_timestamp": "1762652579.761517", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1", - "developer": "ModelCloud", - "inference_platform": "unknown", - "id": "ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5268919799465418 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3252726665015006 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3249166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17644614361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 5.453 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Mostafa8Mehrabi/Mostafa8Mehrabi_llama-3.2-1b-Insomnia-ChatBot-merged/940d1360-047b-4c12-a7e5-cd002675c69c.json b/leaderboard_data/HFOpenLLMv2/Mostafa8Mehrabi/Mostafa8Mehrabi_llama-3.2-1b-Insomnia-ChatBot-merged/940d1360-047b-4c12-a7e5-cd002675c69c.json deleted file mode 100644 index 53b1812768f4b8bf43cfbb50f778257401e1cdff..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Mostafa8Mehrabi/Mostafa8Mehrabi_llama-3.2-1b-Insomnia-ChatBot-merged/940d1360-047b-4c12-a7e5-cd002675c69c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Mostafa8Mehrabi_llama-3.2-1b-Insomnia-ChatBot-merged/1762652579.7624152", - "retrieved_timestamp": "1762652579.7624161", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Mostafa8Mehrabi/llama-3.2-1b-Insomnia-ChatBot-merged", - "developer": "Mostafa8Mehrabi", - "inference_platform": "unknown", - "id": "Mostafa8Mehrabi/llama-3.2-1b-Insomnia-ChatBot-merged" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13206735905176042 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3003508901818665 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23657718120805368 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33815625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11311502659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MrRobotoAI/MrRobotoAI_MrRoboto-ProLong-8b-v4i/7c100a09-f34e-4bd7-b201-3779ee5a769d.json b/leaderboard_data/HFOpenLLMv2/MrRobotoAI/MrRobotoAI_MrRoboto-ProLong-8b-v4i/7c100a09-f34e-4bd7-b201-3779ee5a769d.json deleted file mode 100644 index 4471be4e35932a05f4408a0af3a292556a102880..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MrRobotoAI/MrRobotoAI_MrRoboto-ProLong-8b-v4i/7c100a09-f34e-4bd7-b201-3779ee5a769d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MrRobotoAI_MrRoboto-ProLong-8b-v4i/1762652579.762677", - "retrieved_timestamp": "1762652579.762678", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MrRobotoAI/MrRoboto-ProLong-8b-v4i", - "developer": "MrRobotoAI", - "inference_platform": "unknown", - "id": "MrRobotoAI/MrRoboto-ProLong-8b-v4i" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3834603297029659 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.458548650453507 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.401375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3068484042553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MrRobotoAI/MrRobotoAI_MrRoboto-ProLongBASE-pt8-unaligned-8b/4c54b609-0af6-4116-b62f-1c8a4d68f06b.json b/leaderboard_data/HFOpenLLMv2/MrRobotoAI/MrRobotoAI_MrRoboto-ProLongBASE-pt8-unaligned-8b/4c54b609-0af6-4116-b62f-1c8a4d68f06b.json deleted file mode 100644 index 387ae7f26d3c710f71dcfc10477ebbefbf9b6056..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MrRobotoAI/MrRobotoAI_MrRoboto-ProLongBASE-pt8-unaligned-8b/4c54b609-0af6-4116-b62f-1c8a4d68f06b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MrRobotoAI_MrRoboto-ProLongBASE-pt8-unaligned-8b/1762652579.762937", - "retrieved_timestamp": "1762652579.762937", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MrRobotoAI/MrRoboto-ProLongBASE-pt8-unaligned-8b", - "developer": "MrRobotoAI", - "inference_platform": "unknown", - "id": "MrRobotoAI/MrRoboto-ProLongBASE-pt8-unaligned-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34754008253655855 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4515254903058233 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42788541666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2565658244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Gladiator-Mini-Exp-1211-3B/2cc4a013-ff0c-44b0-b2e1-66e103606e12.json b/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Gladiator-Mini-Exp-1211-3B/2cc4a013-ff0c-44b0-b2e1-66e103606e12.json deleted file mode 100644 index 01db929ded5e651a63eb5fe7a68ec3db8bf6c435..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Gladiator-Mini-Exp-1211-3B/2cc4a013-ff0c-44b0-b2e1-66e103606e12.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MultivexAI_Gladiator-Mini-Exp-1211-3B/1762652579.763158", - "retrieved_timestamp": "1762652579.763159", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MultivexAI/Gladiator-Mini-Exp-1211-3B", - "developer": "MultivexAI", - "inference_platform": "unknown", - "id": "MultivexAI/Gladiator-Mini-Exp-1211-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.68760887777763 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44843752663028075 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13746223564954682 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.326 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3151595744680851 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Gladiator-Mini-Exp-1221-3B-Instruct-V2/a152be8c-a542-4a73-8164-a43e1f04c595.json b/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Gladiator-Mini-Exp-1221-3B-Instruct-V2/a152be8c-a542-4a73-8164-a43e1f04c595.json deleted file mode 100644 index eb54c4301a0927832b0e0aa422f812d658367c73..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Gladiator-Mini-Exp-1221-3B-Instruct-V2/a152be8c-a542-4a73-8164-a43e1f04c595.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MultivexAI_Gladiator-Mini-Exp-1221-3B-Instruct-V2/1762652579.763629", - "retrieved_timestamp": "1762652579.7636302", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct-V2", - "developer": "MultivexAI", - "inference_platform": "unknown", - "id": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct-V2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6215386286165153 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.438883390990549 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14123867069486404 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30082291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3025265957446808 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Gladiator-Mini-Exp-1221-3B-Instruct/ebfb99cd-9672-4c30-9540-46e4035a0d43.json b/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Gladiator-Mini-Exp-1221-3B-Instruct/ebfb99cd-9672-4c30-9540-46e4035a0d43.json deleted file mode 100644 index 992d58c1a69e68237d6029549ce2d3641241bba6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Gladiator-Mini-Exp-1221-3B-Instruct/ebfb99cd-9672-4c30-9540-46e4035a0d43.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MultivexAI_Gladiator-Mini-Exp-1221-3B-Instruct/1762652579.763424", - "retrieved_timestamp": "1762652579.763425", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct", - "developer": "MultivexAI", - "inference_platform": "unknown", - "id": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6078748830879843 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4369766992416903 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1351963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31145833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3048537234042553 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Gladiator-Mini-Exp-1222-3B-Instruct/990d6877-4045-49ef-ae23-f5a6302185d6.json b/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Gladiator-Mini-Exp-1222-3B-Instruct/990d6877-4045-49ef-ae23-f5a6302185d6.json deleted file mode 100644 index d19d47b6e76d3354a4832eb8345c841cf94b7e5f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Gladiator-Mini-Exp-1222-3B-Instruct/990d6877-4045-49ef-ae23-f5a6302185d6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MultivexAI_Gladiator-Mini-Exp-1222-3B-Instruct/1762652579.763836", - "retrieved_timestamp": "1762652579.7638369", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MultivexAI/Gladiator-Mini-Exp-1222-3B-Instruct", - "developer": "MultivexAI", - "inference_platform": "unknown", - "id": "MultivexAI/Gladiator-Mini-Exp-1222-3B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6163180361440976 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4373182371021645 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14123867069486404 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31276041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30169547872340424 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF/c14766b4-5339-4c6e-87d9-fc2bb953e176.json b/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF/c14766b4-5339-4c6e-87d9-fc2bb953e176.json deleted file mode 100644 index 7d4536459aa0c7626d40c2d340277868211fbabe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF/c14766b4-5339-4c6e-87d9-fc2bb953e176.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MultivexAI_Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF/1762652579.764051", - "retrieved_timestamp": "1762652579.764052", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MultivexAI/Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF", - "developer": "MultivexAI", - "inference_platform": "unknown", - "id": "MultivexAI/Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14398241111362298 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29077474506950557 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3641979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11087101063829788 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-0.3B-Instruct-v1.1/0f9eeb32-85fb-4778-8618-436aa4f891ad.json b/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-0.3B-Instruct-v1.1/0f9eeb32-85fb-4778-8618-436aa4f891ad.json deleted file mode 100644 index c685b50501f5adcd4811ab720df9c62a6c34071c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-0.3B-Instruct-v1.1/0f9eeb32-85fb-4778-8618-436aa4f891ad.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Mxode_NanoLM-0.3B-Instruct-v1.1/1762652579.764531", - "retrieved_timestamp": "1762652579.764531", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Mxode/NanoLM-0.3B-Instruct-v1.1", - "developer": "Mxode", - "inference_platform": "unknown", - "id": "Mxode/NanoLM-0.3B-Instruct-v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17827918810977095 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3014403673764691 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42733333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11211768617021277 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.315 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-0.3B-Instruct-v1/3c08189e-294e-4682-a7e0-e73a8d498fb2.json b/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-0.3B-Instruct-v1/3c08189e-294e-4682-a7e0-e73a8d498fb2.json deleted file mode 100644 index 5aece453085b44583af97f8c82195039330843fc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-0.3B-Instruct-v1/3c08189e-294e-4682-a7e0-e73a8d498fb2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Mxode_NanoLM-0.3B-Instruct-v1/1762652579.764268", - "retrieved_timestamp": "1762652579.764269", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Mxode/NanoLM-0.3B-Instruct-v1", - "developer": "Mxode", - "inference_platform": "unknown", - "id": "Mxode/NanoLM-0.3B-Instruct-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1536744726215331 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30282462164767127 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41552083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11053856382978723 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.315 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-0.3B-Instruct-v2/43ce0bee-e8ee-417d-be0d-841d6e26b330.json b/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-0.3B-Instruct-v2/43ce0bee-e8ee-417d-be0d-841d6e26b330.json deleted file mode 100644 index 49297ded9c9ee1e6ed2f6d4d4a49aed4cc3ecf7b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-0.3B-Instruct-v2/43ce0bee-e8ee-417d-be0d-841d6e26b330.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Mxode_NanoLM-0.3B-Instruct-v2/1762652579.7647529", - "retrieved_timestamp": "1762652579.7647538", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Mxode/NanoLM-0.3B-Instruct-v2", - "developer": "Mxode", - "inference_platform": "unknown", - "id": "Mxode/NanoLM-0.3B-Instruct-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1667885654507817 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29211039456850646 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3954583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11344747340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.315 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-1B-Instruct-v1.1/2e482de2-60ca-4758-9de8-4482e42a5b7a.json b/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-1B-Instruct-v1.1/2e482de2-60ca-4758-9de8-4482e42a5b7a.json deleted file mode 100644 index 1153a7070ddbcaa7eaa0bc5b94f2311fcb55242b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-1B-Instruct-v1.1/2e482de2-60ca-4758-9de8-4482e42a5b7a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Mxode_NanoLM-1B-Instruct-v1.1/1762652579.764964", - "retrieved_timestamp": "1762652579.764964", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Mxode/NanoLM-1B-Instruct-v1.1", - "developer": "Mxode", - "inference_platform": "unknown", - "id": "Mxode/NanoLM-1B-Instruct-v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23952889444451833 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31835012059590373 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34327083333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12150930851063829 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.076 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-1B-Instruct-v2/d7d1e48d-86af-4f65-803b-30fff69c78b5.json b/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-1B-Instruct-v2/d7d1e48d-86af-4f65-803b-30fff69c78b5.json deleted file mode 100644 index f3f4936d8fda1a17604acee010a0bd8832317f9f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-1B-Instruct-v2/d7d1e48d-86af-4f65-803b-30fff69c78b5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Mxode_NanoLM-1B-Instruct-v2/1762652579.765177", - "retrieved_timestamp": "1762652579.7651782", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Mxode/NanoLM-1B-Instruct-v2", - "developer": "Mxode", - "inference_platform": "unknown", - "id": "Mxode/NanoLM-1B-Instruct-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2629844368497808 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3123145400715591 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35520833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12375332446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.076 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/NAPS-ai/NAPS-ai_naps-llama-3_1-8b-instruct-v0.3/d0ce5c14-28fa-4fde-901e-6670db6943de.json b/leaderboard_data/HFOpenLLMv2/NAPS-ai/NAPS-ai_naps-llama-3_1-8b-instruct-v0.3/d0ce5c14-28fa-4fde-901e-6670db6943de.json deleted file mode 100644 index 004ae590c98c745fd1222fb20c7c4eac34120334..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/NAPS-ai/NAPS-ai_naps-llama-3_1-8b-instruct-v0.3/d0ce5c14-28fa-4fde-901e-6670db6943de.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-llama-3_1-8b-instruct-v0.3/1762652579.765912", - "retrieved_timestamp": "1762652579.765913", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.3", - "developer": "NAPS-ai", - "inference_platform": "unknown", - "id": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5390818583580456 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4900525115527062 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1903323262839879 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37870833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33984375 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/NAPS-ai/NAPS-ai_naps-llama-3_1-8b-instruct-v0.4/467a9428-e85d-489d-be59-91842b389732.json b/leaderboard_data/HFOpenLLMv2/NAPS-ai/NAPS-ai_naps-llama-3_1-8b-instruct-v0.4/467a9428-e85d-489d-be59-91842b389732.json deleted file mode 100644 index d1e3b9ade89634861745a66a07f61ff8028bc3ed..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/NAPS-ai/NAPS-ai_naps-llama-3_1-8b-instruct-v0.4/467a9428-e85d-489d-be59-91842b389732.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-llama-3_1-8b-instruct-v0.4/1762652579.766172", - "retrieved_timestamp": "1762652579.766173", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.4", - "developer": "NAPS-ai", - "inference_platform": "unknown", - "id": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7344202272193336 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4861833360906734 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19637462235649547 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4421145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3474900265957447 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/NAPS-ai/NAPS-ai_naps-llama-3_1-instruct-v0.5.0/5553fa1d-6bf9-469d-b870-590dd4965209.json b/leaderboard_data/HFOpenLLMv2/NAPS-ai/NAPS-ai_naps-llama-3_1-instruct-v0.5.0/5553fa1d-6bf9-469d-b870-590dd4965209.json deleted file mode 100644 index 8cca74cdf60ab6744e035b12808fd36ecd98e770..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/NAPS-ai/NAPS-ai_naps-llama-3_1-instruct-v0.5.0/5553fa1d-6bf9-469d-b870-590dd4965209.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-llama-3_1-instruct-v0.5.0/1762652579.766381", - "retrieved_timestamp": "1762652579.766382", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NAPS-ai/naps-llama-3_1-instruct-v0.5.0", - "developer": "NAPS-ai", - "inference_platform": "unknown", - "id": "NAPS-ai/naps-llama-3_1-instruct-v0.5.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5020124381086628 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4147584365689691 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37127083333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26138630319148937 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/NCSOFT/NCSOFT_Llama-VARCO-8B-Instruct/38876858-0585-4edb-a4af-e4c71530429c.json b/leaderboard_data/HFOpenLLMv2/NCSOFT/NCSOFT_Llama-VARCO-8B-Instruct/38876858-0585-4edb-a4af-e4c71530429c.json deleted file mode 100644 index 80f4e3594a16d3c8676bbc300466fd7c2cb6f84e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/NCSOFT/NCSOFT_Llama-VARCO-8B-Instruct/38876858-0585-4edb-a4af-e4c71530429c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NCSOFT_Llama-VARCO-8B-Instruct/1762652579.767406", - "retrieved_timestamp": "1762652579.7674072", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NCSOFT/Llama-VARCO-8B-Instruct", - "developer": "NCSOFT", - "inference_platform": "unknown", - "id": "NCSOFT/Llama-VARCO-8B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4470327619604871 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5022879316026018 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3840729166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31898271276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/NJS26/NJS26_NJS_777/211449c7-9b14-4d20-a599-58718e9c5e4b.json b/leaderboard_data/HFOpenLLMv2/NJS26/NJS26_NJS_777/211449c7-9b14-4d20-a599-58718e9c5e4b.json deleted file mode 100644 index ce7a68af85e466e5bf3da67666bd68197f46b2a2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/NJS26/NJS26_NJS_777/211449c7-9b14-4d20-a599-58718e9c5e4b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NJS26_NJS_777/1762652579.76769", - "retrieved_timestamp": "1762652579.76769", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NJS26/NJS_777", - "developer": "NJS26", - "inference_platform": "unknown", - "id": "NJS26/NJS_777" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18809647291409015 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21782097894078087 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2063758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35378125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11627327127659574 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 10.362 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/NLPark/NLPark_AnFeng_v3.1-Avocet/17b3cc41-69ac-48a2-9371-a5d1368dfeb9.json b/leaderboard_data/HFOpenLLMv2/NLPark/NLPark_AnFeng_v3.1-Avocet/17b3cc41-69ac-48a2-9371-a5d1368dfeb9.json deleted file mode 100644 index b2e36791666f6d586660d7a44e01a1eaf95b1f92..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/NLPark/NLPark_AnFeng_v3.1-Avocet/17b3cc41-69ac-48a2-9371-a5d1368dfeb9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NLPark_AnFeng_v3.1-Avocet/1762652579.76799", - "retrieved_timestamp": "1762652579.767991", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NLPark/AnFeng_v3.1-Avocet", - "developer": "NLPark", - "inference_platform": "unknown", - "id": "NLPark/AnFeng_v3.1-Avocet" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5096311121158525 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.582852329074409 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1593655589123867 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44757291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44381648936170215 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.393 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/NLPark/NLPark_B-and-W_Flycatcher-3AD1E/95b94fcb-7aba-4473-b88f-36dddcd646c1.json b/leaderboard_data/HFOpenLLMv2/NLPark/NLPark_B-and-W_Flycatcher-3AD1E/95b94fcb-7aba-4473-b88f-36dddcd646c1.json deleted file mode 100644 index 962e28d6f49071da9b65810231918009d425643e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/NLPark/NLPark_B-and-W_Flycatcher-3AD1E/95b94fcb-7aba-4473-b88f-36dddcd646c1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NLPark_B-and-W_Flycatcher-3AD1E/1762652579.7682638", - "retrieved_timestamp": "1762652579.768265", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NLPark/B-and-W_Flycatcher-3AD1E", - "developer": "NLPark", - "inference_platform": "unknown", - "id": "NLPark/B-and-W_Flycatcher-3AD1E" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49084650948372543 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6065117528534355 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23791540785498488 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44227083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4740691489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/NLPark/NLPark_Shi-Ci-Robin-Test_3AD80/0fa6785d-8db5-40f9-b259-3368ffb547d4.json b/leaderboard_data/HFOpenLLMv2/NLPark/NLPark_Shi-Ci-Robin-Test_3AD80/0fa6785d-8db5-40f9-b259-3368ffb547d4.json deleted file mode 100644 index 0863d4fe19b956434b2aeea78ebed1cabfb180d3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/NLPark/NLPark_Shi-Ci-Robin-Test_3AD80/0fa6785d-8db5-40f9-b259-3368ffb547d4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NLPark_Shi-Ci-Robin-Test_3AD80/1762652579.768489", - "retrieved_timestamp": "1762652579.76849", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NLPark/Shi-Ci-Robin-Test_3AD80", - "developer": "NLPark", - "inference_platform": "unknown", - "id": "NLPark/Shi-Ci-Robin-Test_3AD80" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7226547782107031 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6704805157570325 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3598993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46959375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5120511968085106 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/NTQAI/NTQAI_NxMobileLM-1.5B-SFT/7a295af9-fb47-484f-8748-af3ee245d2c5.json b/leaderboard_data/HFOpenLLMv2/NTQAI/NTQAI_NxMobileLM-1.5B-SFT/7a295af9-fb47-484f-8748-af3ee245d2c5.json deleted file mode 100644 index 421e966bde6c943deb430e7bb1a94b017f6c066a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/NTQAI/NTQAI_NxMobileLM-1.5B-SFT/7a295af9-fb47-484f-8748-af3ee245d2c5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NTQAI_NxMobileLM-1.5B-SFT/1762652579.768717", - "retrieved_timestamp": "1762652579.768718", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NTQAI/NxMobileLM-1.5B-SFT", - "developer": "NTQAI", - "inference_platform": "unknown", - "id": "NTQAI/NxMobileLM-1.5B-SFT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6392239258500778 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39571778048116 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35552083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28174867021276595 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/NTQAI/NTQAI_Nxcode-CQ-7B-orpo/1c020e50-fe68-40c9-a36a-7bec201f409a.json b/leaderboard_data/HFOpenLLMv2/NTQAI/NTQAI_Nxcode-CQ-7B-orpo/1c020e50-fe68-40c9-a36a-7bec201f409a.json deleted file mode 100644 index 10c0b2dae074074f28448837231ea9a83ab9c504..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/NTQAI/NTQAI_Nxcode-CQ-7B-orpo/1c020e50-fe68-40c9-a36a-7bec201f409a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NTQAI_Nxcode-CQ-7B-orpo/1762652579.769034", - "retrieved_timestamp": "1762652579.769035", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NTQAI/Nxcode-CQ-7B-orpo", - "developer": "NTQAI", - "inference_platform": "unknown", - "id": "NTQAI/Nxcode-CQ-7B-orpo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40072119753365515 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4143023249178217 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39396875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16115359042553193 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.25 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/NYTK/NYTK_PULI-LlumiX-32K/7230c1f3-d7f6-4a96-8308-b2d5895a0a0a.json b/leaderboard_data/HFOpenLLMv2/NYTK/NYTK_PULI-LlumiX-32K/7230c1f3-d7f6-4a96-8308-b2d5895a0a0a.json deleted file mode 100644 index b2d6ee0977b03f08115b9fa5a8d789cff0ab0afe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/NYTK/NYTK_PULI-LlumiX-32K/7230c1f3-d7f6-4a96-8308-b2d5895a0a0a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NYTK_PULI-LlumiX-32K/1762652579.76952", - "retrieved_timestamp": "1762652579.769521", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NYTK/PULI-LlumiX-32K", - "developer": "NYTK", - "inference_platform": "unknown", - "id": "NYTK/PULI-LlumiX-32K" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1699612583500667 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31893582242949375 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39641666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16805186170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/NbAiLab/NbAiLab_nb-llama-3.1-8B-Instruct/b0f68843-2f49-4d2a-91ab-ad8d07791125.json b/leaderboard_data/HFOpenLLMv2/NbAiLab/NbAiLab_nb-llama-3.1-8B-Instruct/b0f68843-2f49-4d2a-91ab-ad8d07791125.json deleted file mode 100644 index 229512a3707d0f4227d5f8670bca47ae85c564e9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/NbAiLab/NbAiLab_nb-llama-3.1-8B-Instruct/b0f68843-2f49-4d2a-91ab-ad8d07791125.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NbAiLab_nb-llama-3.1-8B-Instruct/1762652579.7700322", - "retrieved_timestamp": "1762652579.770033", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NbAiLab/nb-llama-3.1-8B-Instruct", - "developer": "NbAiLab", - "inference_platform": "unknown", - "id": "NbAiLab/nb-llama-3.1-8B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.362502604201297 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466553135589526 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.022658610271903322 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32076041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1196808510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/NbAiLab/NbAiLab_nb-llama-3.1-8B-sft/e8313b88-13ee-4926-90f8-696b0604c7b9.json b/leaderboard_data/HFOpenLLMv2/NbAiLab/NbAiLab_nb-llama-3.1-8B-sft/e8313b88-13ee-4926-90f8-696b0604c7b9.json deleted file mode 100644 index e7f8f963c548471cd56e1af05b5bfe5d08877dfb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/NbAiLab/NbAiLab_nb-llama-3.1-8B-sft/e8313b88-13ee-4926-90f8-696b0604c7b9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NbAiLab_nb-llama-3.1-8B-sft/1762652579.7703218", - "retrieved_timestamp": "1762652579.770323", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NbAiLab/nb-llama-3.1-8B-sft", - "developer": "NbAiLab", - "inference_platform": "unknown", - "id": "NbAiLab/nb-llama-3.1-8B-sft" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36157838978355206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3281509048328078 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3287291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12217420212765957 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Nekochu/Nekochu_Llama-3.1-8B-french-DPO/ebc2a3b7-30e9-4608-a8c0-ea90a308c0e5.json b/leaderboard_data/HFOpenLLMv2/Nekochu/Nekochu_Llama-3.1-8B-french-DPO/ebc2a3b7-30e9-4608-a8c0-ea90a308c0e5.json deleted file mode 100644 index 2ba1a11a41f3f85afa7fb8e47dc9a9c6cc3cca97..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Nekochu/Nekochu_Llama-3.1-8B-french-DPO/ebc2a3b7-30e9-4608-a8c0-ea90a308c0e5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nekochu_Llama-3.1-8B-french-DPO/1762652579.770777", - "retrieved_timestamp": "1762652579.7707782", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nekochu/Llama-3.1-8B-french-DPO", - "developer": "Nekochu", - "inference_platform": "unknown", - "id": "Nekochu/Llama-3.1-8B-french-DPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46564227361179444 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5110888403999433 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4215625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414228723404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Nekochu/Nekochu_Luminia-13B-v3/172f121a-3843-4b01-94e1-a95001909bb8.json b/leaderboard_data/HFOpenLLMv2/Nekochu/Nekochu_Luminia-13B-v3/172f121a-3843-4b01-94e1-a95001909bb8.json deleted file mode 100644 index c92d93d964c9db3bc0ecf18b1166605981ec48d4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Nekochu/Nekochu_Luminia-13B-v3/172f121a-3843-4b01-94e1-a95001909bb8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nekochu_Luminia-13B-v3/1762652579.771023", - "retrieved_timestamp": "1762652579.771023", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nekochu/Luminia-13B-v3", - "developer": "Nekochu", - "inference_platform": "unknown", - "id": "Nekochu/Luminia-13B-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25231829323971505 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41121515510929624 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3983333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22149268617021275 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.016 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Nekochu/Nekochu_Luminia-8B-RP/fd23ba4a-a0ce-474b-9aa4-b5295d872028.json b/leaderboard_data/HFOpenLLMv2/Nekochu/Nekochu_Luminia-8B-RP/fd23ba4a-a0ce-474b-9aa4-b5295d872028.json deleted file mode 100644 index 9a50f5bfaca2b2456b93dc45025aa4730f6452fe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Nekochu/Nekochu_Luminia-8B-RP/fd23ba4a-a0ce-474b-9aa4-b5295d872028.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nekochu_Luminia-8B-RP/1762652579.7713962", - "retrieved_timestamp": "1762652579.7713978", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nekochu/Luminia-8B-RP", - "developer": "Nekochu", - "inference_platform": "unknown", - "id": "Nekochu/Luminia-8B-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5574165436597118 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5218151030627874 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13595166163141995 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3997604166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3631150265957447 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/NeverSleep/NeverSleep_Lumimaid-v0.2-12B/cee1293c-54fb-4275-b5a9-0215e5f9a4c0.json b/leaderboard_data/HFOpenLLMv2/NeverSleep/NeverSleep_Lumimaid-v0.2-12B/cee1293c-54fb-4275-b5a9-0215e5f9a4c0.json deleted file mode 100644 index b9bdbd6217988ef607d8154e502b996b06fca99d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/NeverSleep/NeverSleep_Lumimaid-v0.2-12B/cee1293c-54fb-4275-b5a9-0215e5f9a4c0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NeverSleep_Lumimaid-v0.2-12B/1762652579.771668", - "retrieved_timestamp": "1762652579.771669", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NeverSleep/Lumimaid-v0.2-12B", - "developer": "NeverSleep", - "inference_platform": "unknown", - "id": "NeverSleep/Lumimaid-v0.2-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10993497253952846 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5395610525850818 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48211458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3511469414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/NeverSleep/NeverSleep_Lumimaid-v0.2-8B/6d7f1ac9-66c8-4700-87a9-0e413fc8878e.json b/leaderboard_data/HFOpenLLMv2/NeverSleep/NeverSleep_Lumimaid-v0.2-8B/6d7f1ac9-66c8-4700-87a9-0e413fc8878e.json deleted file mode 100644 index 95f35860e0c45a0572a6503a80dec45848210388..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/NeverSleep/NeverSleep_Lumimaid-v0.2-8B/6d7f1ac9-66c8-4700-87a9-0e413fc8878e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NeverSleep_Lumimaid-v0.2-8B/1762652579.771939", - "retrieved_timestamp": "1762652579.771939", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NeverSleep/Lumimaid-v0.2-8B", - "developer": "NeverSleep", - "inference_platform": "unknown", - "id": "NeverSleep/Lumimaid-v0.2-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5038109992597419 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5237767601226618 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4303020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36361369680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Nexesenex/Nexesenex_Nemotron_W_4b_Halo_0.1/243b045a-8442-41fd-a483-e4e25b771048.json b/leaderboard_data/HFOpenLLMv2/Nexesenex/Nexesenex_Nemotron_W_4b_Halo_0.1/243b045a-8442-41fd-a483-e4e25b771048.json deleted file mode 100644 index 9115ad10e55889db80f513e375e51110a277eed6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Nexesenex/Nexesenex_Nemotron_W_4b_Halo_0.1/243b045a-8442-41fd-a483-e4e25b771048.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Nemotron_W_4b_Halo_0.1/1762652579.78175", - "retrieved_timestamp": "1762652579.7817512", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Nemotron_W_4b_Halo_0.1", - "developer": "Nexesenex", - "inference_platform": "unknown", - "id": "Nexesenex/Nemotron_W_4b_Halo_0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3627275628665275 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4135101667655742 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28020134228187926 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41651041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25049867021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.513 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Nexesenex/Nexesenex_Nemotron_W_4b_MagLight_0.1/2f3f0dcb-a62d-44bd-b86d-c1f403d5b833.json b/leaderboard_data/HFOpenLLMv2/Nexesenex/Nexesenex_Nemotron_W_4b_MagLight_0.1/2f3f0dcb-a62d-44bd-b86d-c1f403d5b833.json deleted file mode 100644 index 1308d91299cac2baf9c814a9894f419ba523c31d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Nexesenex/Nexesenex_Nemotron_W_4b_MagLight_0.1/2f3f0dcb-a62d-44bd-b86d-c1f403d5b833.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Nemotron_W_4b_MagLight_0.1/1762652579.781992", - "retrieved_timestamp": "1762652579.781993", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Nemotron_W_4b_MagLight_0.1", - "developer": "Nexesenex", - "inference_platform": "unknown", - "id": "Nexesenex/Nemotron_W_4b_MagLight_0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4230275668559422 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42314083807225433 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41120833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2544880319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.513 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Nexesenex/Nexesenex_pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL/318afc06-f294-4253-b1c9-173a7f56083b.json b/leaderboard_data/HFOpenLLMv2/Nexesenex/Nexesenex_pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL/318afc06-f294-4253-b1c9-173a7f56083b.json deleted file mode 100644 index 35d7bd691f27c3b4ef4c7e1d919f815ce64052ae..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Nexesenex/Nexesenex_pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL/318afc06-f294-4253-b1c9-173a7f56083b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL/1762652579.7826922", - "retrieved_timestamp": "1762652579.7826939", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL", - "developer": "Nexesenex", - "inference_platform": "unknown", - "id": "Nexesenex/pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5889905450870357 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3562492190965966 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07477341389728095 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33955208333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1802692819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Nexusflow/Nexusflow_NexusRaven-V2-13B/f5e5662e-803e-4f1f-82e7-14a2a189ed6d.json b/leaderboard_data/HFOpenLLMv2/Nexusflow/Nexusflow_NexusRaven-V2-13B/f5e5662e-803e-4f1f-82e7-14a2a189ed6d.json deleted file mode 100644 index 156c68fee87b606ce685d918592150a884c7fa97..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Nexusflow/Nexusflow_NexusRaven-V2-13B/f5e5662e-803e-4f1f-82e7-14a2a189ed6d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexusflow_NexusRaven-V2-13B/1762652579.782948", - "retrieved_timestamp": "1762652579.7829492", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexusflow/NexusRaven-V2-13B", - "developer": "Nexusflow", - "inference_platform": "unknown", - "id": "Nexusflow/NexusRaven-V2-13B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1790781792311068 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39488604640507335 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3736875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18716755319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/NikolaSigmoid/NikolaSigmoid_AceMath-1.5B-Instruct-1epoch/0cf3db2f-9b23-4602-ac92-265bafd36410.json b/leaderboard_data/HFOpenLLMv2/NikolaSigmoid/NikolaSigmoid_AceMath-1.5B-Instruct-1epoch/0cf3db2f-9b23-4602-ac92-265bafd36410.json deleted file mode 100644 index b6f521bc642f6aa6c18dab8db4170acf086a0420..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/NikolaSigmoid/NikolaSigmoid_AceMath-1.5B-Instruct-1epoch/0cf3db2f-9b23-4602-ac92-265bafd36410.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NikolaSigmoid_AceMath-1.5B-Instruct-1epoch/1762652579.783191", - "retrieved_timestamp": "1762652579.7831922", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NikolaSigmoid/AceMath-1.5B-Instruct-1epoch", - "developer": "NikolaSigmoid", - "inference_platform": "unknown", - "id": "NikolaSigmoid/AceMath-1.5B-Instruct-1epoch" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2848918646967823 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.426284784119477 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30513595166163143 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39251041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23761635638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.791 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/NikolaSigmoid/NikolaSigmoid_AceMath-1.5B-Instruct-dolphin-r1-200/93f56942-30d8-4a0f-af8d-901fb264436c.json b/leaderboard_data/HFOpenLLMv2/NikolaSigmoid/NikolaSigmoid_AceMath-1.5B-Instruct-dolphin-r1-200/93f56942-30d8-4a0f-af8d-901fb264436c.json deleted file mode 100644 index b3b9181ee5c95aa3eec845ecc29903ebc2da4ac9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/NikolaSigmoid/NikolaSigmoid_AceMath-1.5B-Instruct-dolphin-r1-200/93f56942-30d8-4a0f-af8d-901fb264436c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NikolaSigmoid_AceMath-1.5B-Instruct-dolphin-r1-200/1762652579.783446", - "retrieved_timestamp": "1762652579.783447", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NikolaSigmoid/AceMath-1.5B-Instruct-dolphin-r1-200", - "developer": "NikolaSigmoid", - "inference_platform": "unknown", - "id": "NikolaSigmoid/AceMath-1.5B-Instruct-dolphin-r1-200" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18080249294095221 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28148007801214714 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37495833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11427859042553191 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.928 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/NikolaSigmoid/NikolaSigmoid_acemath-200/4414a96e-0664-4531-9c0f-3eb4a062fbe2.json b/leaderboard_data/HFOpenLLMv2/NikolaSigmoid/NikolaSigmoid_acemath-200/4414a96e-0664-4531-9c0f-3eb4a062fbe2.json deleted file mode 100644 index 06b5c1c799c33c5d3263abfa555ec96c93deb4a5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/NikolaSigmoid/NikolaSigmoid_acemath-200/4414a96e-0664-4531-9c0f-3eb4a062fbe2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NikolaSigmoid_acemath-200/1762652579.783974", - "retrieved_timestamp": "1762652579.783974", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NikolaSigmoid/acemath-200", - "developer": "NikolaSigmoid", - "inference_platform": "unknown", - "id": "NikolaSigmoid/acemath-200" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2848918646967823 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.426284784119477 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30513595166163143 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39251041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23761635638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.791 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain-Eris-BMO_Violent-GRPO-v0.420/e841483e-042b-4a2a-8dbc-9ed7529f7618.json b/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain-Eris-BMO_Violent-GRPO-v0.420/e841483e-042b-4a2a-8dbc-9ed7529f7618.json deleted file mode 100644 index 551aeb0c7d4169794286fff269bd335c0e51ec22..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain-Eris-BMO_Violent-GRPO-v0.420/e841483e-042b-4a2a-8dbc-9ed7529f7618.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nitral-AI_Captain-Eris-BMO_Violent-GRPO-v0.420/1762652579.784868", - "retrieved_timestamp": "1762652579.7848692", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nitral-AI/Captain-Eris-BMO_Violent-GRPO-v0.420", - "developer": "Nitral-AI", - "inference_platform": "unknown", - "id": "Nitral-AI/Captain-Eris-BMO_Violent-GRPO-v0.420" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6312805578088361 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5078530730075063 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4228020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.359624335106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain-Eris_BMO-Violent-12B/ebcd5d63-5c91-41d1-b9e2-0bafe7170000.json b/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain-Eris_BMO-Violent-12B/ebcd5d63-5c91-41d1-b9e2-0bafe7170000.json deleted file mode 100644 index 8f66107eb01315ebc4f550e38b4b30e6afd71cc3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain-Eris_BMO-Violent-12B/ebcd5d63-5c91-41d1-b9e2-0bafe7170000.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nitral-AI_Captain-Eris_BMO-Violent-12B/1762652579.785123", - "retrieved_timestamp": "1762652579.785124", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nitral-AI/Captain-Eris_BMO-Violent-12B", - "developer": "Nitral-AI", - "inference_platform": "unknown", - "id": "Nitral-AI/Captain-Eris_BMO-Violent-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.615218730745533 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5104372825851065 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13670694864048338 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42553124999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35713098404255317 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain-Eris_Violet-GRPO-v0.420/cf030461-1234-48ce-a025-ba0f52cdf191.json b/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain-Eris_Violet-GRPO-v0.420/cf030461-1234-48ce-a025-ba0f52cdf191.json deleted file mode 100644 index 08f53d583a71c72a0082ee2df7ec5105a75c89b0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain-Eris_Violet-GRPO-v0.420/cf030461-1234-48ce-a025-ba0f52cdf191.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nitral-AI_Captain-Eris_Violet-GRPO-v0.420/1762652579.785343", - "retrieved_timestamp": "1762652579.785344", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nitral-AI/Captain-Eris_Violet-GRPO-v0.420", - "developer": "Nitral-AI", - "inference_platform": "unknown", - "id": "Nitral-AI/Captain-Eris_Violet-GRPO-v0.420" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6261597007052399 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.515921407165298 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42791666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35347406914893614 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain-Eris_Violet-V0.420-12B/ad87ba77-99a9-463f-aea3-1d29fc0317b0.json b/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain-Eris_Violet-V0.420-12B/ad87ba77-99a9-463f-aea3-1d29fc0317b0.json deleted file mode 100644 index f6777e5b9a9e8e12a06bbabcd7590a4fbd5d78dd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain-Eris_Violet-V0.420-12B/ad87ba77-99a9-463f-aea3-1d29fc0317b0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nitral-AI_Captain-Eris_Violet-V0.420-12B/1762652579.785556", - "retrieved_timestamp": "1762652579.785557", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nitral-AI/Captain-Eris_Violet-V0.420-12B", - "developer": "Nitral-AI", - "inference_platform": "unknown", - "id": "Nitral-AI/Captain-Eris_Violet-V0.420-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43391866913123844 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5478099417611365 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10725075528700906 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43306249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3722573138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain_BMO-12B/6fed7e5b-9692-40f7-913e-fc3b57b8c72a.json b/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain_BMO-12B/6fed7e5b-9692-40f7-913e-fc3b57b8c72a.json deleted file mode 100644 index 33861e469525e801aaa68000cea020e3b49cb445..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain_BMO-12B/6fed7e5b-9692-40f7-913e-fc3b57b8c72a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nitral-AI_Captain_BMO-12B/1762652579.7857668", - "retrieved_timestamp": "1762652579.7857668", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nitral-AI/Captain_BMO-12B", - "developer": "Nitral-AI", - "inference_platform": "unknown", - "id": "Nitral-AI/Captain_BMO-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4750595087700634 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5285960650424973 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13972809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37480208333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3568816489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Hathor_Stable-v0.2-L3-8B/2bb06e2f-9aee-4ac4-b9a6-fe537c2c9890.json b/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Hathor_Stable-v0.2-L3-8B/2bb06e2f-9aee-4ac4-b9a6-fe537c2c9890.json deleted file mode 100644 index e3b1d6413fd50d2d76d467ec5936bbac8bb93413..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Hathor_Stable-v0.2-L3-8B/2bb06e2f-9aee-4ac4-b9a6-fe537c2c9890.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nitral-AI_Hathor_Stable-v0.2-L3-8B/1762652579.7859662", - "retrieved_timestamp": "1762652579.785967", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nitral-AI/Hathor_Stable-v0.2-L3-8B", - "developer": "Nitral-AI", - "inference_platform": "unknown", - "id": "Nitral-AI/Hathor_Stable-v0.2-L3-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7174840534226963 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5285819178301682 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10498489425981873 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3780625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36959773936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Hathor_Tahsin-L3-8B-v0.85/a73461e6-a1f4-43c9-9a0f-f03c9be46276.json b/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Hathor_Tahsin-L3-8B-v0.85/a73461e6-a1f4-43c9-9a0f-f03c9be46276.json deleted file mode 100644 index 74b9a7bdef40b8dfa846952c82bb5abde8bfb606..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Hathor_Tahsin-L3-8B-v0.85/a73461e6-a1f4-43c9-9a0f-f03c9be46276.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nitral-AI_Hathor_Tahsin-L3-8B-v0.85/1762652579.786179", - "retrieved_timestamp": "1762652579.78618", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nitral-AI/Hathor_Tahsin-L3-8B-v0.85", - "developer": "Nitral-AI", - "inference_platform": "unknown", - "id": "Nitral-AI/Hathor_Tahsin-L3-8B-v0.85" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7110145524984818 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5279036861109899 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10045317220543806 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3646666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37200797872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Nera_Noctis-12B/2f5caa38-56e9-4740-baca-22fb02e57150.json b/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Nera_Noctis-12B/2f5caa38-56e9-4740-baca-22fb02e57150.json deleted file mode 100644 index a5243d0dc55c80dca2de1700f7cae7badc90893f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Nera_Noctis-12B/2f5caa38-56e9-4740-baca-22fb02e57150.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nitral-AI_Nera_Noctis-12B/1762652579.786392", - "retrieved_timestamp": "1762652579.7863932", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nitral-AI/Nera_Noctis-12B", - "developer": "Nitral-AI", - "inference_platform": "unknown", - "id": "Nitral-AI/Nera_Noctis-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45617517076911485 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5193675192746302 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08761329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39790624999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3468251329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Nohobby/Nohobby_MS-Schisandra-22B-v0.1/9836e2c7-30df-421d-bf02-d4434f97d990.json b/leaderboard_data/HFOpenLLMv2/Nohobby/Nohobby_MS-Schisandra-22B-v0.1/9836e2c7-30df-421d-bf02-d4434f97d990.json deleted file mode 100644 index 067c99d8b732b8069f45fc49b05b17135751c014..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Nohobby/Nohobby_MS-Schisandra-22B-v0.1/9836e2c7-30df-421d-bf02-d4434f97d990.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nohobby_MS-Schisandra-22B-v0.1/1762652579.786606", - "retrieved_timestamp": "1762652579.786607", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nohobby/MS-Schisandra-22B-v0.1", - "developer": "Nohobby", - "inference_platform": "unknown", - "id": "Nohobby/MS-Schisandra-22B-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6331289866443259 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5789949714896523 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22280966767371602 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39284375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4095744680851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Nohobby/Nohobby_MS-Schisandra-22B-v0.2/9a263094-fb31-43b9-9307-6ae5f64f82c0.json b/leaderboard_data/HFOpenLLMv2/Nohobby/Nohobby_MS-Schisandra-22B-v0.2/9a263094-fb31-43b9-9307-6ae5f64f82c0.json deleted file mode 100644 index ff6333620edd12c51381c0959339e1bc717d4511..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Nohobby/Nohobby_MS-Schisandra-22B-v0.2/9a263094-fb31-43b9-9307-6ae5f64f82c0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nohobby_MS-Schisandra-22B-v0.2/1762652579.78686", - "retrieved_timestamp": "1762652579.786861", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nohobby/MS-Schisandra-22B-v0.2", - "developer": "Nohobby", - "inference_platform": "unknown", - "id": "Nohobby/MS-Schisandra-22B-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6382997114323329 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5841215984231857 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40747916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4136469414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Alpha/6ce53368-e6b5-45a1-a997-ca5468f27c13.json b/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Alpha/6ce53368-e6b5-45a1-a997-ca5468f27c13.json deleted file mode 100644 index 42cae64503cc2c7808ce4d67ad06986ec6d407ac..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Alpha/6ce53368-e6b5-45a1-a997-ca5468f27c13.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Norquinal_Alpha/1762652579.787071", - "retrieved_timestamp": "1762652579.787072", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Norquinal/Alpha", - "developer": "Norquinal", - "inference_platform": "unknown", - "id": "Norquinal/Alpha" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802951723648808 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3373652507108038 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36308333333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30028257978723405 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Bravo/dbdae48e-5023-453f-b15f-cf779068e030.json b/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Bravo/dbdae48e-5023-453f-b15f-cf779068e030.json deleted file mode 100644 index a1f7c4934858c04221d4e6509b50bfc7e09dcf2b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Bravo/dbdae48e-5023-453f-b15f-cf779068e030.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Norquinal_Bravo/1762652579.787321", - "retrieved_timestamp": "1762652579.787322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Norquinal/Bravo", - "developer": "Norquinal", - "inference_platform": "unknown", - "id": "Norquinal/Bravo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3024519386339357 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3558431980261287 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38686458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.312749335106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Charlie/31f784e4-bded-48d8-b7a6-7936b5d21d9e.json b/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Charlie/31f784e4-bded-48d8-b7a6-7936b5d21d9e.json deleted file mode 100644 index 92c8e05aaa62324f05f3fd6e9031f461e909e51f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Charlie/31f784e4-bded-48d8-b7a6-7936b5d21d9e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Norquinal_Charlie/1762652579.787528", - "retrieved_timestamp": "1762652579.787528", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Norquinal/Charlie", - "developer": "Norquinal", - "inference_platform": "unknown", - "id": "Norquinal/Charlie" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3060989286205047 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3515288346438244 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3736875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30925864361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Delta/684a3a6e-c74d-456f-b80e-c099b8c9973c.json b/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Delta/684a3a6e-c74d-456f-b80e-c099b8c9973c.json deleted file mode 100644 index b9f2540c526e838b32a64eae12b912e5f8cc5b4e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Delta/684a3a6e-c74d-456f-b80e-c099b8c9973c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Norquinal_Delta/1762652579.78773", - "retrieved_timestamp": "1762652579.787731", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Norquinal/Delta", - "developer": "Norquinal", - "inference_platform": "unknown", - "id": "Norquinal/Delta" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.253842028041153 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3434783285415976 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3776875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2958776595744681 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Echo/f2f250f7-8cb0-4076-b2f0-7cf8ee911532.json b/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Echo/f2f250f7-8cb0-4076-b2f0-7cf8ee911532.json deleted file mode 100644 index ce746cafeb2810cb2a6471f2da82987f4cd61ecb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Echo/f2f250f7-8cb0-4076-b2f0-7cf8ee911532.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Norquinal_Echo/1762652579.787929", - "retrieved_timestamp": "1762652579.787929", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Norquinal/Echo", - "developer": "Norquinal", - "inference_platform": "unknown", - "id": "Norquinal/Echo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31579099012841483 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35304654390055795 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3804479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30950797872340424 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Foxtrot/2a4428d4-a6c9-427c-ba67-72f08b590b8e.json b/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Foxtrot/2a4428d4-a6c9-427c-ba67-72f08b590b8e.json deleted file mode 100644 index 19ac8fdf54917d6b9446c954167255f8bcef8995..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Foxtrot/2a4428d4-a6c9-427c-ba67-72f08b590b8e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Norquinal_Foxtrot/1762652579.788121", - "retrieved_timestamp": "1762652579.788121", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Norquinal/Foxtrot", - "developer": "Norquinal", - "inference_platform": "unknown", - "id": "Norquinal/Foxtrot" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011531624977283 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3558026577191667 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3804166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30501994680851063 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Golf/dfdcfbfa-c023-40bf-b5e3-632b45f28aab.json b/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Golf/dfdcfbfa-c023-40bf-b5e3-632b45f28aab.json deleted file mode 100644 index 3bd2bad02d33f25160fb68cde5b4b403cc93eee2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Golf/dfdcfbfa-c023-40bf-b5e3-632b45f28aab.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Norquinal_Golf/1762652579.788314", - "retrieved_timestamp": "1762652579.7883148", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Norquinal/Golf", - "developer": "Norquinal", - "inference_platform": "unknown", - "id": "Norquinal/Golf" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3533601953926692 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35332648991705207 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.338 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30560172872340424 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Hotel/f91abb9a-6690-4fec-b1a7-f519dfe66d24.json b/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Hotel/f91abb9a-6690-4fec-b1a7-f519dfe66d24.json deleted file mode 100644 index e913776d4505ff15a9c17a0715ad6fcf290c7bdc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Hotel/f91abb9a-6690-4fec-b1a7-f519dfe66d24.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Norquinal_Hotel/1762652579.788509", - "retrieved_timestamp": "1762652579.7885098", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Norquinal/Hotel", - "developer": "Norquinal", - "inference_platform": "unknown", - "id": "Norquinal/Hotel" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3215113676157041 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36785702492059275 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3156582446808511 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Nous-Hermes-2-Mistral-7B-DPO/877421ae-8135-485f-805e-489ed70dc886.json b/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Nous-Hermes-2-Mistral-7B-DPO/877421ae-8135-485f-805e-489ed70dc886.json deleted file mode 100644 index f5cd822c3b9be4dbdb7f08cf4492e4be8cc26adb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Nous-Hermes-2-Mistral-7B-DPO/877421ae-8135-485f-805e-489ed70dc886.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NousResearch_Nous-Hermes-2-Mistral-7B-DPO/1762652579.7912042", - "retrieved_timestamp": "1762652579.7912052", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO", - "developer": "NousResearch", - "inference_platform": "unknown", - "id": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5762510139762497 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48526536654652347 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099697 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3999791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3015292553191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Nous-Hermes-2-Mixtral-8x7B-DPO/bc2d14fe-000a-40ce-a57c-c00fe584a7e4.json b/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Nous-Hermes-2-Mixtral-8x7B-DPO/bc2d14fe-000a-40ce-a57c-c00fe584a7e4.json deleted file mode 100644 index 47c9b778617de8bc98abbd2b592dde312d9271c7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Nous-Hermes-2-Mixtral-8x7B-DPO/bc2d14fe-000a-40ce-a57c-c00fe584a7e4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NousResearch_Nous-Hermes-2-Mixtral-8x7B-DPO/1762652579.791439", - "retrieved_timestamp": "1762652579.7914398", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", - "developer": "NousResearch", - "inference_platform": "unknown", - "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5896898008395501 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5538851384033822 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12235649546827794 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4595416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3666057180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Nous-Hermes-2-Mixtral-8x7B-SFT/3c196d70-44ad-419c-8c4c-80fc7f184687.json b/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Nous-Hermes-2-Mixtral-8x7B-SFT/3c196d70-44ad-419c-8c4c-80fc7f184687.json deleted file mode 100644 index db5eed83d6bfda86ac93168d0669b58e2c9d0a1a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Nous-Hermes-2-Mixtral-8x7B-SFT/3c196d70-44ad-419c-8c4c-80fc7f184687.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NousResearch_Nous-Hermes-2-Mixtral-8x7B-SFT/1762652579.791643", - "retrieved_timestamp": "1762652579.7916439", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT", - "developer": "NousResearch", - "inference_platform": "unknown", - "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5730783210769648 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5057868454026635 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.421375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30659906914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Nous-Hermes-2-SOLAR-10.7B/80a7b60b-77f7-4dbf-96c8-071c56179fec.json b/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Nous-Hermes-2-SOLAR-10.7B/80a7b60b-77f7-4dbf-96c8-071c56179fec.json deleted file mode 100644 index 63613e92ed13f5ed025b93ff79b3332a3fa85a46..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Nous-Hermes-2-SOLAR-10.7B/80a7b60b-77f7-4dbf-96c8-071c56179fec.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NousResearch_Nous-Hermes-2-SOLAR-10.7B/1762652579.791853", - "retrieved_timestamp": "1762652579.7918541", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NousResearch/Nous-Hermes-2-SOLAR-10.7B", - "developer": "NousResearch", - "inference_platform": "unknown", - "id": "NousResearch/Nous-Hermes-2-SOLAR-10.7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5278660620486975 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5414294841140173 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43728125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3458277925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Yarn-Solar-10b-32k/a18a259d-1795-4848-94fd-3b9c3abfb9da.json b/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Yarn-Solar-10b-32k/a18a259d-1795-4848-94fd-3b9c3abfb9da.json deleted file mode 100644 index fdf37d0246c6fd3c937eca986000b9d996e1cb7d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Yarn-Solar-10b-32k/a18a259d-1795-4848-94fd-3b9c3abfb9da.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Solar-10b-32k/1762652579.793437", - "retrieved_timestamp": "1762652579.793438", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NousResearch/Yarn-Solar-10b-32k", - "developer": "NousResearch", - "inference_platform": "unknown", - "id": "NousResearch/Yarn-Solar-10b-32k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19421579187666504 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4986859152325069 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4146458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32721077127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Yarn-Solar-10b-64k/1904c811-34ae-4f52-9978-622bc6dd6f2e.json b/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Yarn-Solar-10b-64k/1904c811-34ae-4f52-9978-622bc6dd6f2e.json deleted file mode 100644 index c57b84d30144f4eb257041c9a03b33c338575eb5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Yarn-Solar-10b-64k/1904c811-34ae-4f52-9978-622bc6dd6f2e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Solar-10b-64k/1762652579.793644", - "retrieved_timestamp": "1762652579.7936451", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NousResearch/Yarn-Solar-10b-64k", - "developer": "NousResearch", - "inference_platform": "unknown", - "id": "NousResearch/Yarn-Solar-10b-64k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1988867316498003 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49219907954226505 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40143750000000006 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3148271276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_ASTAROTH-3.2-1B/e454276c-3113-49f8-9397-9c1ad5e7bcc5.json b/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_ASTAROTH-3.2-1B/e454276c-3113-49f8-9397-9c1ad5e7bcc5.json deleted file mode 100644 index c140c09c913f41d1d829011beeabb2b02ac6930f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_ASTAROTH-3.2-1B/e454276c-3113-49f8-9397-9c1ad5e7bcc5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Novaciano_ASTAROTH-3.2-1B/1762652579.7938519", - "retrieved_timestamp": "1762652579.793853", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Novaciano/ASTAROTH-3.2-1B", - "developer": "Novaciano", - "inference_platform": "unknown", - "id": "Novaciano/ASTAROTH-3.2-1B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5612884923115112 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3542962056805596 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31421875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19090757978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_BLAST_PROCESSING-3.2-1B/61173be4-9a87-4dfa-812d-b414b4d2bccb.json b/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_BLAST_PROCESSING-3.2-1B/61173be4-9a87-4dfa-812d-b414b4d2bccb.json deleted file mode 100644 index 589a169eb0ce4c24521fbdafd081d8ce95a20958..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_BLAST_PROCESSING-3.2-1B/61173be4-9a87-4dfa-812d-b414b4d2bccb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Novaciano_BLAST_PROCESSING-3.2-1B/1762652579.794129", - "retrieved_timestamp": "1762652579.7941298", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Novaciano/BLAST_PROCESSING-3.2-1B", - "developer": "Novaciano", - "inference_platform": "unknown", - "id": "Novaciano/BLAST_PROCESSING-3.2-1B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3921783091087204 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3460318843168258 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07477341389728097 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3351458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19414893617021275 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_Cerberus-3.2-1B/2d6ff76b-9d81-45a7-8768-6a240b5395ab.json b/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_Cerberus-3.2-1B/2d6ff76b-9d81-45a7-8768-6a240b5395ab.json deleted file mode 100644 index 6872e2b81e9cad8b44778c9a38da8cdb5e56b46f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_Cerberus-3.2-1B/2d6ff76b-9d81-45a7-8768-6a240b5395ab.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Novaciano_Cerberus-3.2-1B/1762652579.7945569", - "retrieved_timestamp": "1762652579.794559", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Novaciano/Cerberus-3.2-1B", - "developer": "Novaciano", - "inference_platform": "unknown", - "id": "Novaciano/Cerberus-3.2-1B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5016877440746109 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4164937678626939 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32888541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1663065159574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_Cultist-3.2-1B/3dc51dce-222f-455b-b61a-04904c7fc855.json b/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_Cultist-3.2-1B/3dc51dce-222f-455b-b61a-04904c7fc855.json deleted file mode 100644 index d05e7d7598ec21e3977d8a5c8a4d398f44884d87..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_Cultist-3.2-1B/3dc51dce-222f-455b-b61a-04904c7fc855.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Novaciano_Cultist-3.2-1B/1762652579.7949288", - "retrieved_timestamp": "1762652579.79493", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Novaciano/Cultist-3.2-1B", - "developer": "Novaciano", - "inference_platform": "unknown", - "id": "Novaciano/Cultist-3.2-1B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5294895322189568 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3399311286410264 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3330104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17137632978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_FuseChat-3.2-1B-GRPO_Creative_RP/16a8882c-12f5-46d0-8e1f-88b22aa8f08c.json b/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_FuseChat-3.2-1B-GRPO_Creative_RP/16a8882c-12f5-46d0-8e1f-88b22aa8f08c.json deleted file mode 100644 index db9cccd9ee19800cb7a6785275b4339fdc7c2acd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_FuseChat-3.2-1B-GRPO_Creative_RP/16a8882c-12f5-46d0-8e1f-88b22aa8f08c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Novaciano_FuseChat-3.2-1B-GRPO_Creative_RP/1762652579.795153", - "retrieved_timestamp": "1762652579.795153", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Novaciano/FuseChat-3.2-1B-GRPO_Creative_RP", - "developer": "Novaciano", - "inference_platform": "unknown", - "id": "Novaciano/FuseChat-3.2-1B-GRPO_Creative_RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.559814625194484 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3487816706572648 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08006042296072508 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33288541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17345412234042554 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_Fusetrix-3.2-1B-GRPO_RP_Creative/7fe4c32b-0bbd-49c0-9e4f-43306457aae8.json b/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_Fusetrix-3.2-1B-GRPO_RP_Creative/7fe4c32b-0bbd-49c0-9e4f-43306457aae8.json deleted file mode 100644 index 5295e689b15da794f06ff2cefd10f0e0968d652d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_Fusetrix-3.2-1B-GRPO_RP_Creative/7fe4c32b-0bbd-49c0-9e4f-43306457aae8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Novaciano_Fusetrix-3.2-1B-GRPO_RP_Creative/1762652579.795362", - "retrieved_timestamp": "1762652579.795362", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Novaciano/Fusetrix-3.2-1B-GRPO_RP_Creative", - "developer": "Novaciano", - "inference_platform": "unknown", - "id": "Novaciano/Fusetrix-3.2-1B-GRPO_RP_Creative" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5366339091388627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3434595088038714 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1148036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3209166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17578125 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_HarmfulProject-3.2-1B/99b31db9-55f8-41c2-9eb9-f21511deccf0.json b/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_HarmfulProject-3.2-1B/99b31db9-55f8-41c2-9eb9-f21511deccf0.json deleted file mode 100644 index 4fd9b12359e8890a13494b7212bebbc009c1fc20..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_HarmfulProject-3.2-1B/99b31db9-55f8-41c2-9eb9-f21511deccf0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Novaciano_HarmfulProject-3.2-1B/1762652579.7958348", - "retrieved_timestamp": "1762652579.795836", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Novaciano/HarmfulProject-3.2-1B", - "developer": "Novaciano", - "inference_platform": "unknown", - "id": "Novaciano/HarmfulProject-3.2-1B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873821460391761 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32744993658117816 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.341875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18226396276595744 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_LEWD-Mental-Cultist-3.2-1B/1bce579e-9fac-46a9-92ef-48080832abbb.json b/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_LEWD-Mental-Cultist-3.2-1B/1bce579e-9fac-46a9-92ef-48080832abbb.json deleted file mode 100644 index c61fb8125476e9c61af6974f78011613a7799350..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_LEWD-Mental-Cultist-3.2-1B/1bce579e-9fac-46a9-92ef-48080832abbb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Novaciano_LEWD-Mental-Cultist-3.2-1B/1762652579.796045", - "retrieved_timestamp": "1762652579.796046", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Novaciano/LEWD-Mental-Cultist-3.2-1B", - "developer": "Novaciano", - "inference_platform": "unknown", - "id": "Novaciano/LEWD-Mental-Cultist-3.2-1B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5308636639671627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35127188813594756 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32228125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1768617021276596 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_La_Mejor_Mezcla-3.2-1B/49fef1c9-bf18-465c-acdb-b8f17e93dbad.json b/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_La_Mejor_Mezcla-3.2-1B/49fef1c9-bf18-465c-acdb-b8f17e93dbad.json deleted file mode 100644 index 18eb35e54ab0e2430b61cababea653e6c16e743c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_La_Mejor_Mezcla-3.2-1B/49fef1c9-bf18-465c-acdb-b8f17e93dbad.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Novaciano_La_Mejor_Mezcla-3.2-1B/1762652579.79625", - "retrieved_timestamp": "1762652579.7962508", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Novaciano/La_Mejor_Mezcla-3.2-1B", - "developer": "Novaciano", - "inference_platform": "unknown", - "id": "Novaciano/La_Mejor_Mezcla-3.2-1B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5509969104199081 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34879364478381225 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18292885638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_Sigil-Of-Satan-3.2-1B/ae9ceba0-8e8a-431f-a762-7bb6c55b4757.json b/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_Sigil-Of-Satan-3.2-1B/ae9ceba0-8e8a-431f-a762-7bb6c55b4757.json deleted file mode 100644 index 632aaafe6be4e0175a9b4f1c1dddf545698eb81c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_Sigil-Of-Satan-3.2-1B/ae9ceba0-8e8a-431f-a762-7bb6c55b4757.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Novaciano_Sigil-Of-Satan-3.2-1B/1762652579.7964501", - "retrieved_timestamp": "1762652579.7964501", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Novaciano/Sigil-Of-Satan-3.2-1B", - "developer": "Novaciano", - "inference_platform": "unknown", - "id": "Novaciano/Sigil-Of-Satan-3.2-1B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5494233079340594 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3545862332731657 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3276145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18550531914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/NucleusAI/NucleusAI_nucleus-22B-token-500B/f18c51de-f5eb-4986-8c44-35bd71db5e8b.json b/leaderboard_data/HFOpenLLMv2/NucleusAI/NucleusAI_nucleus-22B-token-500B/f18c51de-f5eb-4986-8c44-35bd71db5e8b.json deleted file mode 100644 index 6c331a0c72a8f15a04fa5fe485e8e0b20087e7ab..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/NucleusAI/NucleusAI_nucleus-22B-token-500B/f18c51de-f5eb-4986-8c44-35bd71db5e8b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NucleusAI_nucleus-22B-token-500B/1762652579.7966561", - "retrieved_timestamp": "1762652579.7966561", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NucleusAI/nucleus-22B-token-500B", - "developer": "NucleusAI", - "inference_platform": "unknown", - "id": "NucleusAI/nucleus-22B-token-500B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.025654153202391873 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29198007801214715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3510520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11619015957446809 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 21.828 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/OEvortex/OEvortex_HelpingAI-15B/4ffdc303-b5e4-45f0-839c-432f04dc5d57.json b/leaderboard_data/HFOpenLLMv2/OEvortex/OEvortex_HelpingAI-15B/4ffdc303-b5e4-45f0-839c-432f04dc5d57.json deleted file mode 100644 index e81c64c73140a3358ada28c1821c33ba55420f6f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/OEvortex/OEvortex_HelpingAI-15B/4ffdc303-b5e4-45f0-839c-432f04dc5d57.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OEvortex_HelpingAI-15B/1762652579.797408", - "retrieved_timestamp": "1762652579.797409", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OEvortex/HelpingAI-15B", - "developer": "OEvortex", - "inference_platform": "unknown", - "id": "OEvortex/HelpingAI-15B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2030091268944179 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936006977853758 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.361875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11112034574468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 15.323 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/OEvortex/OEvortex_HelpingAI-3B-reloaded/628026b2-efc1-4592-a85b-f5d2ea1dc1dd.json b/leaderboard_data/HFOpenLLMv2/OEvortex/OEvortex_HelpingAI-3B-reloaded/628026b2-efc1-4592-a85b-f5d2ea1dc1dd.json deleted file mode 100644 index 4e5348434a95436be5ca4db422e5d5d6cf4a9419..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/OEvortex/OEvortex_HelpingAI-3B-reloaded/628026b2-efc1-4592-a85b-f5d2ea1dc1dd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OEvortex_HelpingAI-3B-reloaded/1762652579.797647", - "retrieved_timestamp": "1762652579.797647", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OEvortex/HelpingAI-3B-reloaded", - "developer": "OEvortex", - "inference_platform": "unknown", - "id": "OEvortex/HelpingAI-3B-reloaded" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46466819150963884 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4128512897904065 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3524479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25947473404255317 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.81 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/OEvortex/OEvortex_HelpingAI2-9B/d04d6474-5784-4492-8347-a2bc03eca6ba.json b/leaderboard_data/HFOpenLLMv2/OEvortex/OEvortex_HelpingAI2-9B/d04d6474-5784-4492-8347-a2bc03eca6ba.json deleted file mode 100644 index 74afc67f4e08782cef751deecb0b4dcc6bc2fee8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/OEvortex/OEvortex_HelpingAI2-9B/d04d6474-5784-4492-8347-a2bc03eca6ba.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OEvortex_HelpingAI2-9B/1762652579.797843", - "retrieved_timestamp": "1762652579.797844", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OEvortex/HelpingAI2-9B", - "developer": "OEvortex", - "inference_platform": "unknown", - "id": "OEvortex/HelpingAI2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44131238447319776 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4844617641983123 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3710833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28997672872340424 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.903 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/OEvortex/OEvortex_HelpingAI2.5-10B/6a41fcba-f13d-4839-8a91-ff3f18de5114.json b/leaderboard_data/HFOpenLLMv2/OEvortex/OEvortex_HelpingAI2.5-10B/6a41fcba-f13d-4839-8a91-ff3f18de5114.json deleted file mode 100644 index 1cffa2224801c090fdd40153b39c61dacd724f60..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/OEvortex/OEvortex_HelpingAI2.5-10B/6a41fcba-f13d-4839-8a91-ff3f18de5114.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OEvortex_HelpingAI2.5-10B/1762652579.798051", - "retrieved_timestamp": "1762652579.798051", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OEvortex/HelpingAI2.5-10B", - "developer": "OEvortex", - "inference_platform": "unknown", - "id": "OEvortex/HelpingAI2.5-10B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32765617450586665 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4495657491171711 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37381250000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25748005319148937 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.211 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/OliveiraJLT/OliveiraJLT_Sagui-7B-Instruct-v0.1/d5135349-0757-469d-8ad3-80ef56d1f7de.json b/leaderboard_data/HFOpenLLMv2/OliveiraJLT/OliveiraJLT_Sagui-7B-Instruct-v0.1/d5135349-0757-469d-8ad3-80ef56d1f7de.json deleted file mode 100644 index ac43c436e04615cc1dae9e95d836c2ab9cf8976b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/OliveiraJLT/OliveiraJLT_Sagui-7B-Instruct-v0.1/d5135349-0757-469d-8ad3-80ef56d1f7de.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OliveiraJLT_Sagui-7B-Instruct-v0.1/1762652579.798249", - "retrieved_timestamp": "1762652579.798249", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OliveiraJLT/Sagui-7B-Instruct-v0.1", - "developer": "OliveiraJLT", - "inference_platform": "unknown", - "id": "OliveiraJLT/Sagui-7B-Instruct-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28916275482386733 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3110678914743868 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2424496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4190520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14852061170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Omkar1102/Omkar1102_code-yi/2609af14-3cff-4b19-9741-e1caca56f58a.json b/leaderboard_data/HFOpenLLMv2/Omkar1102/Omkar1102_code-yi/2609af14-3cff-4b19-9741-e1caca56f58a.json deleted file mode 100644 index 7209bd367e9b72e9241a7eec999f63cfbc1a6b03..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Omkar1102/Omkar1102_code-yi/2609af14-3cff-4b19-9741-e1caca56f58a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Omkar1102_code-yi/1762652579.79849", - "retrieved_timestamp": "1762652579.7984908", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Omkar1102/code-yi", - "developer": "Omkar1102", - "inference_platform": "unknown", - "id": "Omkar1102/code-yi" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21477457590304835 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2760062695877461 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3802291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11261635638297872 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.084 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Omkar1102/Omkar1102_code-yi/3edef2ec-9fad-45ba-8fde-4af5c4f24d69.json b/leaderboard_data/HFOpenLLMv2/Omkar1102/Omkar1102_code-yi/3edef2ec-9fad-45ba-8fde-4af5c4f24d69.json deleted file mode 100644 index e86f6a4351154a8040b0b46baba330a82a06168c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Omkar1102/Omkar1102_code-yi/3edef2ec-9fad-45ba-8fde-4af5c4f24d69.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Omkar1102_code-yi/1762652579.798722", - "retrieved_timestamp": "1762652579.798723", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Omkar1102/code-yi", - "developer": "Omkar1102", - "inference_platform": "unknown", - "id": "Omkar1102/code-yi" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2254407195131141 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2750025242693941 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3761979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228390957446809 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.084 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/OmnicromsBrain/OmnicromsBrain_NeuralStar_FusionWriter_4x7b/65ba6556-712c-42cc-817b-ad8c2014dc4c.json b/leaderboard_data/HFOpenLLMv2/OmnicromsBrain/OmnicromsBrain_NeuralStar_FusionWriter_4x7b/65ba6556-712c-42cc-817b-ad8c2014dc4c.json deleted file mode 100644 index c0fa68afa9390e770f49da08d683246f96e7f868..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/OmnicromsBrain/OmnicromsBrain_NeuralStar_FusionWriter_4x7b/65ba6556-712c-42cc-817b-ad8c2014dc4c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OmnicromsBrain_NeuralStar_FusionWriter_4x7b/1762652579.7988968", - "retrieved_timestamp": "1762652579.798898", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OmnicromsBrain/NeuralStar_FusionWriter_4x7b", - "developer": "OmnicromsBrain", - "inference_platform": "unknown", - "id": "OmnicromsBrain/NeuralStar_FusionWriter_4x7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5963842604289951 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47762434766958123 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.401875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2605551861702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.154 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/OnlyCheeini/OnlyCheeini_greesychat-turbo/f3a7f01c-2893-4887-a210-d126d9135edf.json b/leaderboard_data/HFOpenLLMv2/OnlyCheeini/OnlyCheeini_greesychat-turbo/f3a7f01c-2893-4887-a210-d126d9135edf.json deleted file mode 100644 index 1d25216ee37ac21e2dd445f35f0643dbe71c0656..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/OnlyCheeini/OnlyCheeini_greesychat-turbo/f3a7f01c-2893-4887-a210-d126d9135edf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OnlyCheeini_greesychat-turbo/1762652579.7991328", - "retrieved_timestamp": "1762652579.799134", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OnlyCheeini/greesychat-turbo", - "developer": "OnlyCheeini", - "inference_platform": "unknown", - "id": "OnlyCheeini/greesychat-turbo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023256071667619692 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30921339082318816 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3314270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11377992021276596 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/OpenAssistant/OpenAssistant_oasst-sft-1-pythia-12b/ba1129fd-f158-47ad-b194-7cff794b9ef2.json b/leaderboard_data/HFOpenLLMv2/OpenAssistant/OpenAssistant_oasst-sft-1-pythia-12b/ba1129fd-f158-47ad-b194-7cff794b9ef2.json deleted file mode 100644 index d7f561ee390bf8ffc73929b15884127e31e607b8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/OpenAssistant/OpenAssistant_oasst-sft-1-pythia-12b/ba1129fd-f158-47ad-b194-7cff794b9ef2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenAssistant_oasst-sft-1-pythia-12b/1762652579.799746", - "retrieved_timestamp": "1762652579.799747", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenAssistant/oasst-sft-1-pythia-12b", - "developer": "OpenAssistant", - "inference_platform": "unknown", - "id": "OpenAssistant/oasst-sft-1-pythia-12b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10553885911603435 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.314662875941371 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33269791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11128656914893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 12.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-falcon3-10b-v24.2-131k/19bba814-812c-49c2-acf1-9d056fd7d62d.json b/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-falcon3-10b-v24.2-131k/19bba814-812c-49c2-acf1-9d056fd7d62d.json deleted file mode 100644 index 887166c821026052354feaabd50ef8e88d54b511..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-falcon3-10b-v24.2-131k/19bba814-812c-49c2-acf1-9d056fd7d62d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-falcon3-10b-v24.2-131k/1762652579.800029", - "retrieved_timestamp": "1762652579.80003", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-falcon3-10b-v24.2-131k", - "developer": "OpenBuddy", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-falcon3-10b-v24.2-131k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5086315420861093 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6003725722032135 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41864583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3833942819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.34 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-mixtral-7bx8-v18.1-32k/247ee47c-e441-4020-97e3-14e3ed8d22c9.json b/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-mixtral-7bx8-v18.1-32k/247ee47c-e441-4020-97e3-14e3ed8d22c9.json deleted file mode 100644 index 6a881da486e3f69b2761e0ef343e3369ec721464..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-mixtral-7bx8-v18.1-32k/247ee47c-e441-4020-97e3-14e3ed8d22c9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-mixtral-7bx8-v18.1-32k/1762652579.803262", - "retrieved_timestamp": "1762652579.803263", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k", - "developer": "OpenBuddy", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.549347952322061 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46561770563515265 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3830520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38040226063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.741 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-nemotron-70b-v23.1-131k/e4e4d8f4-7e49-4b08-8a08-97e4e2c28616.json b/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-nemotron-70b-v23.1-131k/e4e4d8f4-7e49-4b08-8a08-97e4e2c28616.json deleted file mode 100644 index 304c931c9930b73cea826e3d5d8c2620610f8f43..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-nemotron-70b-v23.1-131k/e4e4d8f4-7e49-4b08-8a08-97e4e2c28616.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-nemotron-70b-v23.1-131k/1762652579.803536", - "retrieved_timestamp": "1762652579.803537", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-nemotron-70b-v23.1-131k", - "developer": "OpenBuddy", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-nemotron-70b-v23.1-131k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7555275557742346 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6749472828128272 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32099697885196377 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36325503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45375000000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5174534574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-nemotron-70b-v23.2-131k/b34ca7d7-6049-4f4f-a2e3-db736009fa4d.json b/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-nemotron-70b-v23.2-131k/b34ca7d7-6049-4f4f-a2e3-db736009fa4d.json deleted file mode 100644 index 62840bf59d9cdbfc3eae4c037d6b5f15f9229da9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-nemotron-70b-v23.2-131k/b34ca7d7-6049-4f4f-a2e3-db736009fa4d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-nemotron-70b-v23.2-131k/1762652579.803802", - "retrieved_timestamp": "1762652579.803806", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-nemotron-70b-v23.2-131k", - "developer": "OpenBuddy", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-nemotron-70b-v23.2-131k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7226547782107031 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6704805157570325 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3598993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46959375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5120511968085106 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-qwq-32b-v24.1-200k/a2b990cd-e692-44fc-8b39-ac91eab85cef.json b/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-qwq-32b-v24.1-200k/a2b990cd-e692-44fc-8b39-ac91eab85cef.json deleted file mode 100644 index 0ce31864bbd209f6ca3695470f8673cd8a56e7bf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-qwq-32b-v24.1-200k/a2b990cd-e692-44fc-8b39-ac91eab85cef.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-qwq-32b-v24.1-200k/1762652579.804893", - "retrieved_timestamp": "1762652579.804894", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-qwq-32b-v24.1-200k", - "developer": "OpenBuddy", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-qwq-32b-v24.1-200k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.593661484860171 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6798496773637743 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37386706948640486 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.484875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5490359042553191 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-qwq-32b-v24.2-200k/24684939-5eb8-40b1-99dd-1ebe693680fc.json b/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-qwq-32b-v24.2-200k/24684939-5eb8-40b1-99dd-1ebe693680fc.json deleted file mode 100644 index b7a7e5769990c4d34db5e74d76013f8783bea953..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-qwq-32b-v24.2-200k/24684939-5eb8-40b1-99dd-1ebe693680fc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-qwq-32b-v24.2-200k/1762652579.8051221", - "retrieved_timestamp": "1762652579.8051221", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-qwq-32b-v24.2-200k", - "developer": "OpenBuddy", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-qwq-32b-v24.2-200k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5969837808126881 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6771537576509328 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3776435045317221 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3766778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47179166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5446309840425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-yi1.5-34b-v21.3-32k/f6a36220-0b31-4b0d-9262-7e0e508e64db.json b/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-yi1.5-34b-v21.3-32k/f6a36220-0b31-4b0d-9262-7e0e508e64db.json deleted file mode 100644 index 8374c4fcf570ad340dbca360b344e1b62af33ac4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-yi1.5-34b-v21.3-32k/f6a36220-0b31-4b0d-9262-7e0e508e64db.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-yi1.5-34b-v21.3-32k/1762652579.8053398", - "retrieved_timestamp": "1762652579.805341", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-yi1.5-34b-v21.3-32k", - "developer": "OpenBuddy", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-yi1.5-34b-v21.3-32k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5420041046645123 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6162574860411373 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1782477341389728 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44394791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4599401595744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.407 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-zero-14b-v22.3-32k/0e288116-902d-4fef-9020-a3a4dc80e698.json b/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-zero-14b-v22.3-32k/0e288116-902d-4fef-9020-a3a4dc80e698.json deleted file mode 100644 index 532f084c45e32a4e9ca0453b359b04dc3252ba2c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-zero-14b-v22.3-32k/0e288116-902d-4fef-9020-a3a4dc80e698.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-zero-14b-v22.3-32k/1762652579.805548", - "retrieved_timestamp": "1762652579.8055491", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-zero-14b-v22.3-32k", - "developer": "OpenBuddy", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-zero-14b-v22.3-32k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37529200299649373 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4859759816473639 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41660416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187333776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.022 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-zero-3b-v21.2-32k/9d135662-43d6-4b05-90cb-5d2c856b0b89.json b/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-zero-3b-v21.2-32k/9d135662-43d6-4b05-90cb-5d2c856b0b89.json deleted file mode 100644 index 22cd165be67f79fc8d5056dd63fad221ebe1b262..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-zero-3b-v21.2-32k/9d135662-43d6-4b05-90cb-5d2c856b0b89.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-zero-3b-v21.2-32k/1762652579.8057752", - "retrieved_timestamp": "1762652579.8057752", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-zero-3b-v21.2-32k", - "developer": "OpenBuddy", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-zero-3b-v21.2-32k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3802377691192702 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934791831798414 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3566354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20337433510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.769 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-zero-56b-v21.2-32k/7636a893-1404-4257-9778-653f3cfb601b.json b/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-zero-56b-v21.2-32k/7636a893-1404-4257-9778-653f3cfb601b.json deleted file mode 100644 index 2e4286ec7cdc9d408f1a6bc0ded75d3d2988638f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-zero-56b-v21.2-32k/7636a893-1404-4257-9778-653f3cfb601b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-zero-56b-v21.2-32k/1762652579.8059928", - "retrieved_timestamp": "1762652579.805994", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-zero-56b-v21.2-32k", - "developer": "OpenBuddy", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-zero-56b-v21.2-32k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5057092957796425 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6128345897750148 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16238670694864046 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4305208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43991023936170215 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 56.707 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/OpenGenerativeAI/OpenGenerativeAI_Bifrost-14B/cde00174-ac52-42da-9641-0866739232e4.json b/leaderboard_data/HFOpenLLMv2/OpenGenerativeAI/OpenGenerativeAI_Bifrost-14B/cde00174-ac52-42da-9641-0866739232e4.json deleted file mode 100644 index 9428cbae554bba346791fb2957d0defda55d4907..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/OpenGenerativeAI/OpenGenerativeAI_Bifrost-14B/cde00174-ac52-42da-9641-0866739232e4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenGenerativeAI_Bifrost-14B/1762652579.806474", - "retrieved_timestamp": "1762652579.806475", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenGenerativeAI/Bifrost-14B", - "developer": "OpenGenerativeAI", - "inference_platform": "unknown", - "id": "OpenGenerativeAI/Bifrost-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6615302951723648 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6844897889249308 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23564954682779457 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46239583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5073969414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/OpenGenerativeAI/OpenGenerativeAI_Bifrost/cef8e01a-071d-4ee4-997b-44679ef5b56e.json b/leaderboard_data/HFOpenLLMv2/OpenGenerativeAI/OpenGenerativeAI_Bifrost/cef8e01a-071d-4ee4-997b-44679ef5b56e.json deleted file mode 100644 index 3ec1da6089cf7a4d38c6559c60137beb65bd4fa4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/OpenGenerativeAI/OpenGenerativeAI_Bifrost/cef8e01a-071d-4ee4-997b-44679ef5b56e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenGenerativeAI_Bifrost/1762652579.8062131", - "retrieved_timestamp": "1762652579.8062139", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenGenerativeAI/Bifrost", - "developer": "OpenGenerativeAI", - "inference_platform": "unknown", - "id": "OpenGenerativeAI/Bifrost" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6347524568145853 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6849273974523276 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2545317220543807 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36828859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45976041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5159574468085106 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/OpenLLM-France/OpenLLM-France_Lucie-7B-Instruct-human-data/26787f2b-8f30-4cc8-b39e-447b8c53aa85.json b/leaderboard_data/HFOpenLLMv2/OpenLLM-France/OpenLLM-France_Lucie-7B-Instruct-human-data/26787f2b-8f30-4cc8-b39e-447b8c53aa85.json deleted file mode 100644 index 6770c161de940ac64c5f75657a96077750b9f66c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/OpenLLM-France/OpenLLM-France_Lucie-7B-Instruct-human-data/26787f2b-8f30-4cc8-b39e-447b8c53aa85.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenLLM-France_Lucie-7B-Instruct-human-data/1762652579.8072178", - "retrieved_timestamp": "1762652579.807219", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenLLM-France/Lucie-7B-Instruct-human-data", - "developer": "OpenLLM-France", - "inference_platform": "unknown", - "id": "OpenLLM-France/Lucie-7B-Instruct-human-data" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29460830596151544 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32842533479733 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37285416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14295212765957446 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.707 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/OpenLLM-France/OpenLLM-France_Lucie-7B-Instruct-v1.1/e94a0550-93fa-448a-a4a4-187fd1b7d24e.json b/leaderboard_data/HFOpenLLMv2/OpenLLM-France/OpenLLM-France_Lucie-7B-Instruct-v1.1/e94a0550-93fa-448a-a4a4-187fd1b7d24e.json deleted file mode 100644 index e185a959fa4bb3ef2bb6445df05c7048afe34f68..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/OpenLLM-France/OpenLLM-France_Lucie-7B-Instruct-v1.1/e94a0550-93fa-448a-a4a4-187fd1b7d24e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenLLM-France_Lucie-7B-Instruct-v1.1/1762652579.807442", - "retrieved_timestamp": "1762652579.807442", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenLLM-France/Lucie-7B-Instruct-v1.1", - "developer": "OpenLLM-France", - "inference_platform": "unknown", - "id": "OpenLLM-France/Lucie-7B-Instruct-v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3038759380665523 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38158765227444885 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37502083333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1864195478723404 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.707 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/OpenLLM-France/OpenLLM-France_Lucie-7B-Instruct/af17be77-0ae3-4b90-ba85-a4886450cd43.json b/leaderboard_data/HFOpenLLMv2/OpenLLM-France/OpenLLM-France_Lucie-7B-Instruct/af17be77-0ae3-4b90-ba85-a4886450cd43.json deleted file mode 100644 index ad2f63b72698151bdb18e82828088d3941f55760..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/OpenLLM-France/OpenLLM-France_Lucie-7B-Instruct/af17be77-0ae3-4b90-ba85-a4886450cd43.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenLLM-France_Lucie-7B-Instruct/1762652579.806944", - "retrieved_timestamp": "1762652579.806945", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenLLM-France/Lucie-7B-Instruct", - "developer": "OpenLLM-France", - "inference_platform": "unknown", - "id": "OpenLLM-France/Lucie-7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.279645784296777 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3254036581260458 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36621875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15558510638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.707 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/OpenLLM-France/OpenLLM-France_Lucie-7B/01e4cd19-4f1f-4c30-b80f-e1d287d5d7c2.json b/leaderboard_data/HFOpenLLMv2/OpenLLM-France/OpenLLM-France_Lucie-7B/01e4cd19-4f1f-4c30-b80f-e1d287d5d7c2.json deleted file mode 100644 index 15a48f9e0d38b275c7d891d32bf5fb1e12df15ee..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/OpenLLM-France/OpenLLM-France_Lucie-7B/01e4cd19-4f1f-4c30-b80f-e1d287d5d7c2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenLLM-France_Lucie-7B/1762652579.806693", - "retrieved_timestamp": "1762652579.8066938", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenLLM-France/Lucie-7B", - "developer": "OpenLLM-France", - "inference_platform": "unknown", - "id": "OpenLLM-France/Lucie-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24964538535530173 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3492469872973046 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39232291666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14976728723404256 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.707 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Orion-zhen/Orion-zhen_Qwen2.5-7B-Instruct-Uncensored/141239bb-c7e3-4c38-b289-12cd59f592d2.json b/leaderboard_data/HFOpenLLMv2/Orion-zhen/Orion-zhen_Qwen2.5-7B-Instruct-Uncensored/141239bb-c7e3-4c38-b289-12cd59f592d2.json deleted file mode 100644 index 88d90a83011223ec30fb304c31b2dc8af853e8c8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Orion-zhen/Orion-zhen_Qwen2.5-7B-Instruct-Uncensored/141239bb-c7e3-4c38-b289-12cd59f592d2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Orion-zhen_Qwen2.5-7B-Instruct-Uncensored/1762652579.808624", - "retrieved_timestamp": "1762652579.808625", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Orion-zhen/Qwen2.5-7B-Instruct-Uncensored", - "developer": "Orion-zhen", - "inference_platform": "unknown", - "id": "Orion-zhen/Qwen2.5-7B-Instruct-Uncensored" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7204317876567508 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5473918652157296 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4773413897280967 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43613541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4426529255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/P0x0/P0x0_Astra-v1-12B/349ae5f5-55d0-4486-a6dc-2b5644fac045.json b/leaderboard_data/HFOpenLLMv2/P0x0/P0x0_Astra-v1-12B/349ae5f5-55d0-4486-a6dc-2b5644fac045.json deleted file mode 100644 index 6df3f43b931ab75b4aabd09bcd46544ba64291b1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/P0x0/P0x0_Astra-v1-12B/349ae5f5-55d0-4486-a6dc-2b5644fac045.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/P0x0_Astra-v1-12B/1762652579.8091059", - "retrieved_timestamp": "1762652579.8091059", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "P0x0/Astra-v1-12B", - "developer": "P0x0", - "inference_platform": "unknown", - "id": "P0x0/Astra-v1-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28059437847134494 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5214506484138984 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11329305135951662 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4051875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3460771276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_L3.2-Instruct-Thinking-v0.1-1B/3c942d2f-0b53-498e-ab05-71d5075cb974.json b/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_L3.2-Instruct-Thinking-v0.1-1B/3c942d2f-0b53-498e-ab05-71d5075cb974.json deleted file mode 100644 index 60be403926202b4e22ae0ca1438a53af85bac341..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_L3.2-Instruct-Thinking-v0.1-1B/3c942d2f-0b53-498e-ab05-71d5075cb974.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/PJMixers-Dev_L3.2-Instruct-Thinking-v0.1-1B/1762652579.8095942", - "retrieved_timestamp": "1762652579.8095949", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "PJMixers-Dev/L3.2-Instruct-Thinking-v0.1-1B", - "developer": "PJMixers-Dev", - "inference_platform": "unknown", - "id": "PJMixers-Dev/L3.2-Instruct-Thinking-v0.1-1B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46276989498973836 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33018063718974094 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32621875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14827127659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.1-Instruct-Interleaved-Zeroed-13B/fb66b283-bfd6-4437-95b7-d74a0d8d2814.json b/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.1-Instruct-Interleaved-Zeroed-13B/fb66b283-bfd6-4437-95b7-d74a0d8d2814.json deleted file mode 100644 index bf7148a99291f94e3c438830f4edd551e7bed424..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.1-Instruct-Interleaved-Zeroed-13B/fb66b283-bfd6-4437-95b7-d74a0d8d2814.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.1-Instruct-Interleaved-Zeroed-13B/1762652579.809847", - "retrieved_timestamp": "1762652579.809848", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "PJMixers-Dev/LLaMa-3.1-Instruct-Interleaved-Zeroed-13B", - "developer": "PJMixers-Dev", - "inference_platform": "unknown", - "id": "PJMixers-Dev/LLaMa-3.1-Instruct-Interleaved-Zeroed-13B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7871015572015585 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5073267838961463 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2001510574018127 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3869895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3767453457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.047 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B/1d91cdce-0bdb-4567-9296-6225db3aa0bc.json b/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B/1d91cdce-0bdb-4567-9296-6225db3aa0bc.json deleted file mode 100644 index 4e906300c92f39433ff936340ef8e34d6ccb52cc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B/1d91cdce-0bdb-4567-9296-6225db3aa0bc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B/1762652579.8105159", - "retrieved_timestamp": "1762652579.810517", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B", - "developer": "PJMixers-Dev", - "inference_platform": "unknown", - "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.693054428915278 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4556166737589294 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37003125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.312749335106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B/d1875dfd-05ab-4a49-8c7f-02cddf35a695.json b/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B/d1875dfd-05ab-4a49-8c7f-02cddf35a695.json deleted file mode 100644 index df463114b5036820c8e69b4277c6de1756420faf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B/d1875dfd-05ab-4a49-8c7f-02cddf35a695.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B/1762652579.810729", - "retrieved_timestamp": "1762652579.81073", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B", - "developer": "PJMixers-Dev", - "inference_platform": "unknown", - "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6291573026237051 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45814952191015346 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.365875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3115026595744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B/62b12d95-1da2-407c-8552-8c5e951c5c85.json b/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B/62b12d95-1da2-407c-8552-8c5e951c5c85.json deleted file mode 100644 index 8461c682266fd5e68aa95b7a907ab862e475f479..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B/62b12d95-1da2-407c-8552-8c5e951c5c85.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B/1762652579.8109388", - "retrieved_timestamp": "1762652579.8109398", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B", - "developer": "PJMixers-Dev", - "inference_platform": "unknown", - "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6503898544750152 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45107942950222196 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12613293051359517 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3687291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3107546542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.2-Instruct-JankMixBread-v0.1-3B/56f36430-4bb1-425d-ac4b-30d85237667c.json b/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.2-Instruct-JankMixBread-v0.1-3B/56f36430-4bb1-425d-ac4b-30d85237667c.json deleted file mode 100644 index 22cc4533d19cbac1f35c0a07d4bf3becf0920f16..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.2-Instruct-JankMixBread-v0.1-3B/56f36430-4bb1-425d-ac4b-30d85237667c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.2-Instruct-JankMixBread-v0.1-3B/1762652579.8111491", - "retrieved_timestamp": "1762652579.81115", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMixBread-v0.1-3B", - "developer": "PJMixers-Dev", - "inference_platform": "unknown", - "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMixBread-v0.1-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5040858256093831 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4483158594793648 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13066465256797583 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3515520833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.308344414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Parissa3/Parissa3_test-model/53cb44c7-f7bc-40fa-88e7-511b9dfab004.json b/leaderboard_data/HFOpenLLMv2/Parissa3/Parissa3_test-model/53cb44c7-f7bc-40fa-88e7-511b9dfab004.json deleted file mode 100644 index f200d621a8958f8ddb3aea03d67bf6095596d2a7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Parissa3/Parissa3_test-model/53cb44c7-f7bc-40fa-88e7-511b9dfab004.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Parissa3_test-model/1762652579.811859", - "retrieved_timestamp": "1762652579.81186", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Parissa3/test-model", - "developer": "Parissa3", - "inference_platform": "unknown", - "id": "Parissa3/test-model" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3882564927725103 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5193916761801759 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46853125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3056848404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Pinkstack/Pinkstack_SuperThoughts-CoT-14B-16k-o1-QwQ/c604f0fb-517d-45db-9e1c-6c911bce43e7.json b/leaderboard_data/HFOpenLLMv2/Pinkstack/Pinkstack_SuperThoughts-CoT-14B-16k-o1-QwQ/c604f0fb-517d-45db-9e1c-6c911bce43e7.json deleted file mode 100644 index 6b1077cfc1a27c367c934b6d3caabdfdee75d643..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Pinkstack/Pinkstack_SuperThoughts-CoT-14B-16k-o1-QwQ/c604f0fb-517d-45db-9e1c-6c911bce43e7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Pinkstack_SuperThoughts-CoT-14B-16k-o1-QwQ/1762652579.812447", - "retrieved_timestamp": "1762652579.812449", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Pinkstack/SuperThoughts-CoT-14B-16k-o1-QwQ", - "developer": "Pinkstack", - "inference_platform": "unknown", - "id": "Pinkstack/SuperThoughts-CoT-14B-16k-o1-QwQ" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.051457909458015844 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6719989821162488 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4199395770392749 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3926174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4913541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.526845079787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Pinkstack/Pinkstack_Superthoughts-lite-1.8B-experimental-o1/fba2ce2f-6c30-4af9-ae3a-d23f39f3f963.json b/leaderboard_data/HFOpenLLMv2/Pinkstack/Pinkstack_Superthoughts-lite-1.8B-experimental-o1/fba2ce2f-6c30-4af9-ae3a-d23f39f3f963.json deleted file mode 100644 index 429cf3f9aeb80fc81db8fa9086db51a6f80fb368..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Pinkstack/Pinkstack_Superthoughts-lite-1.8B-experimental-o1/fba2ce2f-6c30-4af9-ae3a-d23f39f3f963.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Pinkstack_Superthoughts-lite-1.8B-experimental-o1/1762652579.81273", - "retrieved_timestamp": "1762652579.81273", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Pinkstack/Superthoughts-lite-1.8B-experimental-o1", - "developer": "Pinkstack", - "inference_platform": "unknown", - "id": "Pinkstack/Superthoughts-lite-1.8B-experimental-o1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0375193375798437 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3434736647957908 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33539583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18508976063829788 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.812 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Pinkstack/Pinkstack_Superthoughts-lite-v1/ff308837-dc35-4257-a4cd-de463feb733e.json b/leaderboard_data/HFOpenLLMv2/Pinkstack/Pinkstack_Superthoughts-lite-v1/ff308837-dc35-4257-a4cd-de463feb733e.json deleted file mode 100644 index 853df06e6858a6cc43b7e5ac6d6c73d5de2ce043..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Pinkstack/Pinkstack_Superthoughts-lite-v1/ff308837-dc35-4257-a4cd-de463feb733e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Pinkstack_Superthoughts-lite-v1/1762652579.812961", - "retrieved_timestamp": "1762652579.812962", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Pinkstack/Superthoughts-lite-v1", - "developer": "Pinkstack", - "inference_platform": "unknown", - "id": "Pinkstack/Superthoughts-lite-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1658643510330368 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3465571905256149 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3671770833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17553191489361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.711 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-Instruct-CoreCurriculum-12b/d8145a39-f1d0-4b6e-958b-a96585eeec9f.json b/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-Instruct-CoreCurriculum-12b/d8145a39-f1d0-4b6e-958b-a96585eeec9f.json deleted file mode 100644 index 05ae87b104df80c2f5792134db29b0940d1b6a22..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-Instruct-CoreCurriculum-12b/d8145a39-f1d0-4b6e-958b-a96585eeec9f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/PocketDoc_Dans-Instruct-CoreCurriculum-12b/1762652579.81328", - "retrieved_timestamp": "1762652579.813282", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "PocketDoc/Dans-Instruct-CoreCurriculum-12b", - "developer": "PocketDoc", - "inference_platform": "unknown", - "id": "PocketDoc/Dans-Instruct-CoreCurriculum-12b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21914520139895477 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3788739075240266 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4095625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1219248670212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-PersonalityEngine-V1.1.0-12b/c005ab13-1d42-4e28-802e-12438aab35a4.json b/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-PersonalityEngine-V1.1.0-12b/c005ab13-1d42-4e28-802e-12438aab35a4.json deleted file mode 100644 index c5296ef42f5cbe90845846bd940009cfa54ee7f9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-PersonalityEngine-V1.1.0-12b/c005ab13-1d42-4e28-802e-12438aab35a4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/PocketDoc_Dans-PersonalityEngine-V1.1.0-12b/1762652579.813654", - "retrieved_timestamp": "1762652579.8136551", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "PocketDoc/Dans-PersonalityEngine-V1.1.0-12b", - "developer": "PocketDoc", - "inference_platform": "unknown", - "id": "PocketDoc/Dans-PersonalityEngine-V1.1.0-12b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7074672978807343 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5361046243199591 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10498489425981873 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45867708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32621343085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-PersonalityEngine-V1.2.0-24b/38dd1b21-b357-4daf-94b3-c4a28809e56c.json b/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-PersonalityEngine-V1.2.0-24b/38dd1b21-b357-4daf-94b3-c4a28809e56c.json deleted file mode 100644 index 5a9b6f084f4ca0039a2a77248b80ef81dadd0362..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-PersonalityEngine-V1.2.0-24b/38dd1b21-b357-4daf-94b3-c4a28809e56c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/PocketDoc_Dans-PersonalityEngine-V1.2.0-24b/1762652579.813962", - "retrieved_timestamp": "1762652579.813962", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "PocketDoc/Dans-PersonalityEngine-V1.2.0-24b", - "developer": "PocketDoc", - "inference_platform": "unknown", - "id": "PocketDoc/Dans-PersonalityEngine-V1.2.0-24b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7886252920029965 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6421213844206719 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24546827794561935 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42996875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5025764627659575 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-PersonalityEngine-v1.0.0-8b/f3623b9f-3e3f-4b7b-a9f5-f0a15bf26f48.json b/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-PersonalityEngine-v1.0.0-8b/f3623b9f-3e3f-4b7b-a9f5-f0a15bf26f48.json deleted file mode 100644 index ae08b2e868ab1975346909b39601a119db23f60d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-PersonalityEngine-v1.0.0-8b/f3623b9f-3e3f-4b7b-a9f5-f0a15bf26f48.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/PocketDoc_Dans-PersonalityEngine-v1.0.0-8b/1762652579.814201", - "retrieved_timestamp": "1762652579.814202", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "PocketDoc/Dans-PersonalityEngine-v1.0.0-8b", - "developer": "PocketDoc", - "inference_platform": "unknown", - "id": "PocketDoc/Dans-PersonalityEngine-v1.0.0-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.498190357141274 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47325544259149366 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35415625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3065159574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-SakuraKaze-V1.0.0-12b/b78ef40e-91b1-401d-9576-1ac2f600b32a.json b/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-SakuraKaze-V1.0.0-12b/b78ef40e-91b1-401d-9576-1ac2f600b32a.json deleted file mode 100644 index 529f1f5678785beebd9d3a193fcc172e680682ba..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-SakuraKaze-V1.0.0-12b/b78ef40e-91b1-401d-9576-1ac2f600b32a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/PocketDoc_Dans-SakuraKaze-V1.0.0-12b/1762652579.81442", - "retrieved_timestamp": "1762652579.81442", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "PocketDoc/Dans-SakuraKaze-V1.0.0-12b", - "developer": "PocketDoc", - "inference_platform": "unknown", - "id": "PocketDoc/Dans-SakuraKaze-V1.0.0-12b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6520133246452745 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5405357251132225 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09290030211480363 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47452083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35596742021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/PowerInfer/PowerInfer_SmallThinker-3B-Preview/6613aff7-8f26-4b74-b08b-37fbd7990e42.json b/leaderboard_data/HFOpenLLMv2/PowerInfer/PowerInfer_SmallThinker-3B-Preview/6613aff7-8f26-4b74-b08b-37fbd7990e42.json deleted file mode 100644 index d5bbfefc0dc1fa8c6891e69fd6b58ecbdc2596a7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/PowerInfer/PowerInfer_SmallThinker-3B-Preview/6613aff7-8f26-4b74-b08b-37fbd7990e42.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/PowerInfer_SmallThinker-3B-Preview/1762652579.814635", - "retrieved_timestamp": "1762652579.814636", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "PowerInfer/SmallThinker-3B-Preview", - "developer": "PowerInfer", - "inference_platform": "unknown", - "id": "PowerInfer/SmallThinker-3B-Preview" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6199650261306666 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4494922016660919 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27794561933534745 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3524791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3017785904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/PranavHarshan/PranavHarshan_MedNarra-X1/86023703-88e2-4219-b38b-4c871e2ee381.json b/leaderboard_data/HFOpenLLMv2/PranavHarshan/PranavHarshan_MedNarra-X1/86023703-88e2-4219-b38b-4c871e2ee381.json deleted file mode 100644 index eb55bd3699287f0f9c4121f7990258561865e7ad..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/PranavHarshan/PranavHarshan_MedNarra-X1/86023703-88e2-4219-b38b-4c871e2ee381.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/PranavHarshan_MedNarra-X1/1762652579.815135", - "retrieved_timestamp": "1762652579.815136", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "PranavHarshan/MedNarra-X1", - "developer": "PranavHarshan", - "inference_platform": "unknown", - "id": "PranavHarshan/MedNarra-X1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43384331351924005 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46371668179774184 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35403125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34308510638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_10.7B_48Layers-Appended/eca9180f-20d5-4bcd-9a74-e2f69c4ea4ad.json b/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_10.7B_48Layers-Appended/eca9180f-20d5-4bcd-9a74-e2f69c4ea4ad.json deleted file mode 100644 index 5768a1c660f423a084bc2dcd24f0d007428bc5ba..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_10.7B_48Layers-Appended/eca9180f-20d5-4bcd-9a74-e2f69c4ea4ad.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_10.7B_48Layers-Appended/1762652579.815407", - "retrieved_timestamp": "1762652579.815407", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Appended", - "developer": "Pretergeek", - "inference_platform": "unknown", - "id": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Appended" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5960595663949432 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4619637884426022 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07930513595166164 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3289561170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_10.7B_48Layers-Interleaved/65d32305-4f23-4041-a107-8625822c1322.json b/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_10.7B_48Layers-Interleaved/65d32305-4f23-4041-a107-8625822c1322.json deleted file mode 100644 index 216cca5fee0e6e0cb43c4b6085f39253b900ebe9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_10.7B_48Layers-Interleaved/65d32305-4f23-4041-a107-8625822c1322.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_10.7B_48Layers-Interleaved/1762652579.81567", - "retrieved_timestamp": "1762652579.815671", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Interleaved", - "developer": "Pretergeek", - "inference_platform": "unknown", - "id": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Interleaved" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5960595663949432 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4619637884426022 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3298703457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_32K-PoSE/195acbac-1db7-47ed-907f-98e312fc8921.json b/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_32K-PoSE/195acbac-1db7-47ed-907f-98e312fc8921.json deleted file mode 100644 index 0a862b2d0dd0042e6893184157bb580510053095..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_32K-PoSE/195acbac-1db7-47ed-907f-98e312fc8921.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_32K-PoSE/1762652579.815889", - "retrieved_timestamp": "1762652579.8158898", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Pretergeek/OpenChat-3.5-0106_32K-PoSE", - "developer": "Pretergeek", - "inference_platform": "unknown", - "id": "Pretergeek/OpenChat-3.5-0106_32K-PoSE" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3968991165662664 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3471309425137119 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42054166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.203125 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_8.11B_36Layers-Appended/349bccfd-1816-4845-a1b9-2d9f4936adea.json b/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_8.11B_36Layers-Appended/349bccfd-1816-4845-a1b9-2d9f4936adea.json deleted file mode 100644 index 4fde5126f3534bb98e4051c5c5df9cd67484b7a5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_8.11B_36Layers-Appended/349bccfd-1816-4845-a1b9-2d9f4936adea.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_8.11B_36Layers-Appended/1762652579.8160908", - "retrieved_timestamp": "1762652579.8160908", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Appended", - "developer": "Pretergeek", - "inference_platform": "unknown", - "id": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Appended" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5975833011963811 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4619637884426022 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07930513595166164 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3289561170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.114 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_8.11B_36Layers-Interleaved/c2e26b8a-3a12-4cb8-888e-96affc8cbac9.json b/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_8.11B_36Layers-Interleaved/c2e26b8a-3a12-4cb8-888e-96affc8cbac9.json deleted file mode 100644 index 71666bfdd876c353b932ca60ed5fe0e0ebe2b5cf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_8.11B_36Layers-Interleaved/c2e26b8a-3a12-4cb8-888e-96affc8cbac9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_8.11B_36Layers-Interleaved/1762652579.8163", - "retrieved_timestamp": "1762652579.8163", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Interleaved", - "developer": "Pretergeek", - "inference_platform": "unknown", - "id": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Interleaved" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5960595663949432 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46213045510926887 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42407291666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3298703457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.114 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_8.99B_40Layers-Appended/a70222dc-0589-4f09-ac8c-3ff4fa72328f.json b/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_8.99B_40Layers-Appended/a70222dc-0589-4f09-ac8c-3ff4fa72328f.json deleted file mode 100644 index bff4077605cdc0dfc5e05d40dbab3c4292c35253..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_8.99B_40Layers-Appended/a70222dc-0589-4f09-ac8c-3ff4fa72328f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_8.99B_40Layers-Appended/1762652579.81651", - "retrieved_timestamp": "1762652579.816511", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Appended", - "developer": "Pretergeek", - "inference_platform": "unknown", - "id": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Appended" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5960595663949432 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4619637884426022 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07930513595166164 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3289561170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.987 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_8.99B_40Layers-Interleaved/19eb8f3a-ca9d-4da4-8e7e-96eebfd33576.json b/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_8.99B_40Layers-Interleaved/19eb8f3a-ca9d-4da4-8e7e-96eebfd33576.json deleted file mode 100644 index 28b86b5201c93aef4d63e94d29036ef57fa769eb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_8.99B_40Layers-Interleaved/19eb8f3a-ca9d-4da4-8e7e-96eebfd33576.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_8.99B_40Layers-Interleaved/1762652579.816719", - "retrieved_timestamp": "1762652579.816719", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Interleaved", - "developer": "Pretergeek", - "inference_platform": "unknown", - "id": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Interleaved" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5975833011963811 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46213045510926887 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42407291666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3298703457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.987 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_9.86B_44Layers-Appended/e44eddb9-9764-4bc9-be85-ec7995846da0.json b/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_9.86B_44Layers-Appended/e44eddb9-9764-4bc9-be85-ec7995846da0.json deleted file mode 100644 index c95aa6009361fa7d6ecb14cdb27ca0103d104556..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_9.86B_44Layers-Appended/e44eddb9-9764-4bc9-be85-ec7995846da0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_9.86B_44Layers-Appended/1762652579.816936", - "retrieved_timestamp": "1762652579.816937", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Pretergeek/OpenChat-3.5-0106_9.86B_44Layers-Appended", - "developer": "Pretergeek", - "inference_platform": "unknown", - "id": "Pretergeek/OpenChat-3.5-0106_9.86B_44Layers-Appended" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5960595663949432 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4619637884426022 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07930513595166164 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3289561170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 9.859 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/PrimeIntellect/PrimeIntellect_INTELLECT-1-Instruct/ea823c15-3c92-4a67-a4fd-7826a9dd9e41.json b/leaderboard_data/HFOpenLLMv2/PrimeIntellect/PrimeIntellect_INTELLECT-1-Instruct/ea823c15-3c92-4a67-a4fd-7826a9dd9e41.json deleted file mode 100644 index 96d677d008e53429be110bc95c2d4ee8c6837991..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/PrimeIntellect/PrimeIntellect_INTELLECT-1-Instruct/ea823c15-3c92-4a67-a4fd-7826a9dd9e41.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/PrimeIntellect_INTELLECT-1-Instruct/1762652579.817848", - "retrieved_timestamp": "1762652579.8178492", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "PrimeIntellect/INTELLECT-1-Instruct", - "developer": "PrimeIntellect", - "inference_platform": "unknown", - "id": "PrimeIntellect/INTELLECT-1-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28698007801214714 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.022658610271903322 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3576875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10638297872340426 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.211 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/PrimeIntellect/PrimeIntellect_INTELLECT-1/bfffc240-22ab-4cc0-97c8-466ddf472ac4.json b/leaderboard_data/HFOpenLLMv2/PrimeIntellect/PrimeIntellect_INTELLECT-1/bfffc240-22ab-4cc0-97c8-466ddf472ac4.json deleted file mode 100644 index 5bd185ec440f5514a0506ba65bacd5dc690a8c6f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/PrimeIntellect/PrimeIntellect_INTELLECT-1/bfffc240-22ab-4cc0-97c8-466ddf472ac4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/PrimeIntellect_INTELLECT-1/1762652579.8176599", - "retrieved_timestamp": "1762652579.817661", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "PrimeIntellect/INTELLECT-1", - "developer": "PrimeIntellect", - "inference_platform": "unknown", - "id": "PrimeIntellect/INTELLECT-1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1757315035217667 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27398007801214713 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3752708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11203457446808511 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.211 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/PrimeIntellect/PrimeIntellect_INTELLECT-1/fee7966f-3e1b-43d9-b129-b0c23aac53b5.json b/leaderboard_data/HFOpenLLMv2/PrimeIntellect/PrimeIntellect_INTELLECT-1/fee7966f-3e1b-43d9-b129-b0c23aac53b5.json deleted file mode 100644 index 8105f4c3a8dcc5b68186d69d85f417276429f1e3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/PrimeIntellect/PrimeIntellect_INTELLECT-1/fee7966f-3e1b-43d9-b129-b0c23aac53b5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/PrimeIntellect_INTELLECT-1/1762652579.817406", - "retrieved_timestamp": "1762652579.817406", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "PrimeIntellect/INTELLECT-1", - "developer": "PrimeIntellect", - "inference_platform": "unknown", - "id": "PrimeIntellect/INTELLECT-1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1757315035217667 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27598007801214713 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3339375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228390957446809 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.211 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/PuxAI/PuxAI_LUA_model/05dc0500-be97-456f-9d12-12192626ea39.json b/leaderboard_data/HFOpenLLMv2/PuxAI/PuxAI_LUA_model/05dc0500-be97-456f-9d12-12192626ea39.json deleted file mode 100644 index 1b0deb710b2b0b1eb3cf432c296e98930f3bbdbd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/PuxAI/PuxAI_LUA_model/05dc0500-be97-456f-9d12-12192626ea39.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/PuxAI_LUA_model/1762652579.818059", - "retrieved_timestamp": "1762652579.818059", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "PuxAI/LUA_model", - "developer": "PuxAI", - "inference_platform": "unknown", - "id": "PuxAI/LUA_model" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22821336276634885 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2876778102988436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34838541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228390957446809 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.386 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/PygmalionAI/PygmalionAI_pygmalion-6b/7cdfef58-c871-4158-b97d-ed843f7d667b.json b/leaderboard_data/HFOpenLLMv2/PygmalionAI/PygmalionAI_pygmalion-6b/7cdfef58-c871-4158-b97d-ed843f7d667b.json deleted file mode 100644 index 5f1d57295d2f3fa91f70a24bf99a8ec062607e4f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/PygmalionAI/PygmalionAI_pygmalion-6b/7cdfef58-c871-4158-b97d-ed843f7d667b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/PygmalionAI_pygmalion-6b/1762652579.818316", - "retrieved_timestamp": "1762652579.8183172", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "PygmalionAI/pygmalion-6b", - "developer": "PygmalionAI", - "inference_platform": "unknown", - "id": "PygmalionAI/pygmalion-6b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20910406610016974 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31988944643860034 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3683541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11835106382978723 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPTJForCausalLM", - "params_billions": 6.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Q-bert/Q-bert_MetaMath-1B/713b1c64-9637-4d83-aee9-f81988fec0b5.json b/leaderboard_data/HFOpenLLMv2/Q-bert/Q-bert_MetaMath-1B/713b1c64-9637-4d83-aee9-f81988fec0b5.json deleted file mode 100644 index 730a14cc1f3214be4422af0ddb80fbfe1fb63980..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Q-bert/Q-bert_MetaMath-1B/713b1c64-9637-4d83-aee9-f81988fec0b5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Q-bert_MetaMath-1B/1762652579.8185658", - "retrieved_timestamp": "1762652579.8185658", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Q-bert/MetaMath-1B", - "developer": "Q-bert", - "inference_platform": "unknown", - "id": "Q-bert/MetaMath-1B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5300391849182392 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34506863677929517 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3289166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1495179521276596 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_1up-14b/c315527d-ea14-42a8-a002-4bb67c085fc0.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_1up-14b/c315527d-ea14-42a8-a002-4bb67c085fc0.json deleted file mode 100644 index c1f8f18531c103ce6589cc5820c1275b1532c0d0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_1up-14b/c315527d-ea14-42a8-a002-4bb67c085fc0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_1up-14b/1762652579.818811", - "retrieved_timestamp": "1762652579.818812", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/1up-14b", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/1up-14b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6888079185450161 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6920935635451656 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4161631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624161073825503 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4583333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5406416223404256 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Adamant-14B-sce/7ed9dcc6-7915-4a7e-a190-07e067d2fd79.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Adamant-14B-sce/7ed9dcc6-7915-4a7e-a190-07e067d2fd79.json deleted file mode 100644 index 4d0f05d41f6bf0b4a69d5b5b2abbfa682da1d36f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Adamant-14B-sce/7ed9dcc6-7915-4a7e-a190-07e067d2fd79.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Adamant-14B-sce/1762652579.819103", - "retrieved_timestamp": "1762652579.819104", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Adamant-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Adamant-14B-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6857604489421402 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6858943778247303 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45579166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5371509308510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Alice-14B/3dd99496-1274-439f-b7c2-1fd731745753.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Alice-14B/3dd99496-1274-439f-b7c2-1fd731745753.json deleted file mode 100644 index 5bedfd990cfaeeaa579164a129ffd31186732f3e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Alice-14B/3dd99496-1274-439f-b7c2-1fd731745753.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Alice-14B/1762652579.819317", - "retrieved_timestamp": "1762652579.819317", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Alice-14B", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Alice-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6836371937570092 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6937748567349198 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4569486404833837 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44794791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5418882978723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Alien-CoT-14B-sce/dc89616f-c86d-41d0-9945-12703dc8f905.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Alien-CoT-14B-sce/dc89616f-c86d-41d0-9945-12703dc8f905.json deleted file mode 100644 index 6143cdc30052ac2ea82b7b6b7b9b7315bf10eb30..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Alien-CoT-14B-sce/dc89616f-c86d-41d0-9945-12703dc8f905.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Alien-CoT-14B-sce/1762652579.819517", - "retrieved_timestamp": "1762652579.8195179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Alien-CoT-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Alien-CoT-14B-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07486358417886763 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6395487523790632 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.520392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39177852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47852083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5170378989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Aura-8B-Linear/2d22ab53-547d-41bb-8700-12bc5b16c97d.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Aura-8B-Linear/2d22ab53-547d-41bb-8700-12bc5b16c97d.json deleted file mode 100644 index 807d541bc0f68c3133ae63abecfa0b70a71be730..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Aura-8B-Linear/2d22ab53-547d-41bb-8700-12bc5b16c97d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Aura-8B-Linear/1762652579.819725", - "retrieved_timestamp": "1762652579.819726", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Aura-8B-Linear", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Aura-8B-Linear" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.794770098893159 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5074298101934884 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18051359516616314 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800698138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Casa-14b-sce/09bbb732-62d8-4cec-972a-273b728df1f4.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Casa-14b-sce/09bbb732-62d8-4cec-972a-273b728df1f4.json deleted file mode 100644 index 026e7f57ab07aa7c8bc07c5a229a9f0d1aa5ef6c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Casa-14b-sce/09bbb732-62d8-4cec-972a-273b728df1f4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Casa-14b-sce/1762652579.8199282", - "retrieved_timestamp": "1762652579.8199282", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Casa-14b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Casa-14b-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6653523761397536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6901033460664828 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4697885196374622 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43102083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5425531914893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Casa-14b-sce/a0dde1eb-a763-4568-8122-1b280dedb2ce.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Casa-14b-sce/a0dde1eb-a763-4568-8122-1b280dedb2ce.json deleted file mode 100644 index 2ff50b9e9687edc77f883072407490c5e451cf16..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Casa-14b-sce/a0dde1eb-a763-4568-8122-1b280dedb2ce.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Casa-14b-sce/1762652579.820149", - "retrieved_timestamp": "1762652579.820149", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Casa-14b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Casa-14b-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6718218770639681 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6891400252742456 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4322916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5408078457446809 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Charlie-8B-Linear/c56d7463-dad2-4c9c-8823-a4b6faa5aeb9.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Charlie-8B-Linear/c56d7463-dad2-4c9c-8823-a4b6faa5aeb9.json deleted file mode 100644 index f509976d419b2932de870b8683a3efbeaaf6b711..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Charlie-8B-Linear/c56d7463-dad2-4c9c-8823-a4b6faa5aeb9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Charlie-8B-Linear/1762652579.820338", - "retrieved_timestamp": "1762652579.820339", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Charlie-8B-Linear", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Charlie-8B-Linear" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7380672172059026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5141359215016831 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26510574018126887 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3485416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3572972074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Chromatic-8b-sce/f626897d-5003-40fa-8020-c100748a847f.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Chromatic-8b-sce/f626897d-5003-40fa-8020-c100748a847f.json deleted file mode 100644 index ea86c76f8e8dba0100a182117a3814a700a41984..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Chromatic-8b-sce/f626897d-5003-40fa-8020-c100748a847f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Chromatic-8b-sce/1762652579.8205519", - "retrieved_timestamp": "1762652579.820553", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Chromatic-8b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Chromatic-8b-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5085074269604649 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5063171816307924 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1555891238670695 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.405125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37549867021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Dyson-14b/35c401bd-ed12-475e-afbc-e664243d90d5.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Dyson-14b/35c401bd-ed12-475e-afbc-e664243d90d5.json deleted file mode 100644 index 4a7a55aa4952a4f410c4014a6b38fda3132d2041..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Dyson-14b/35c401bd-ed12-475e-afbc-e664243d90d5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Dyson-14b/1762652579.821013", - "retrieved_timestamp": "1762652579.821014", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Dyson-14b", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Dyson-14b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5856682491345186 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6862902828866305 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5392749244712991 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4259375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5398936170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Edu-14B-Linear/a70e7642-3cc7-4719-bc22-68182baa3857.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Edu-14B-Linear/a70e7642-3cc7-4719-bc22-68182baa3857.json deleted file mode 100644 index 60152df1e31d07cf793cd25439653335a7a741a3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Edu-14B-Linear/a70e7642-3cc7-4719-bc22-68182baa3857.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Edu-14B-Linear/1762652579.821216", - "retrieved_timestamp": "1762652579.821216", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Edu-14B-Linear", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Edu-14B-Linear" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6158182511292261 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6757820996225599 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24471299093655588 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43775000000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.508560505319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Fugazi14b/ee38e1c3-7a6b-4357-94ac-b309da33d14b.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Fugazi14b/ee38e1c3-7a6b-4357-94ac-b309da33d14b.json deleted file mode 100644 index 8bb386dd3ceeed7a128a26fca4c9e540b745d9d8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Fugazi14b/ee38e1c3-7a6b-4357-94ac-b309da33d14b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Fugazi14b/1762652579.8215911", - "retrieved_timestamp": "1762652579.821592", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Fugazi14b", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Fugazi14b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6997987561891337 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6941017680723065 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45455208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5417220744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_GZA-14B-sce/cfb61ec3-ab7e-4697-892e-a8dd62518f39.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_GZA-14B-sce/cfb61ec3-ab7e-4697-892e-a8dd62518f39.json deleted file mode 100644 index d2dd0e58fb9b8648f87073b477b0582cfa4948d6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_GZA-14B-sce/cfb61ec3-ab7e-4697-892e-a8dd62518f39.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_GZA-14B-sce/1762652579.821823", - "retrieved_timestamp": "1762652579.821824", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/GZA-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/GZA-14B-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6274086091570367 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6686539892126272 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47205438066465255 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4284791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.523188164893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Geedorah-14B/c4a79914-b049-436b-9de6-640cc3e119ee.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Geedorah-14B/c4a79914-b049-436b-9de6-640cc3e119ee.json deleted file mode 100644 index 449538b4d8a8486debf963cfcee7508227e797af..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Geedorah-14B/c4a79914-b049-436b-9de6-640cc3e119ee.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Geedorah-14B/1762652579.822031", - "retrieved_timestamp": "1762652579.822032", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Geedorah-14B", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Geedorah-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6872841837435781 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6964189914061528 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44486404833836857 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45467708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5421376329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_GivingTree-8b-sce/9b753075-a150-4bc3-9425-2371010daf8b.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_GivingTree-8b-sce/9b753075-a150-4bc3-9425-2371010daf8b.json deleted file mode 100644 index 4abeac26dd248fa890aa4bdd47bf5332b947c034..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_GivingTree-8b-sce/9b753075-a150-4bc3-9425-2371010daf8b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_GivingTree-8b-sce/1762652579.8222332", - "retrieved_timestamp": "1762652579.8222342", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/GivingTree-8b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/GivingTree-8b-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5006139266036339 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5040482025572203 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15256797583081572 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.405125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37608045212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_GuiltySpark-14B-ties/2b50b73e-9734-4502-b088-8d4936291aaa.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_GuiltySpark-14B-ties/2b50b73e-9734-4502-b088-8d4936291aaa.json deleted file mode 100644 index 7e6adf266d596baaf816a1e8f3e19c2ab5789ba7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_GuiltySpark-14B-ties/2b50b73e-9734-4502-b088-8d4936291aaa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_GuiltySpark-14B-ties/1762652579.822431", - "retrieved_timestamp": "1762652579.822432", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/GuiltySpark-14B-ties", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/GuiltySpark-14B-ties" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6854357549080883 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6914302574038697 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38368580060422963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3649328859060403 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4557291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5399767287234043 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Halo-14B-sce/156424f1-2a1e-4e61-b081-bb066ee3958d.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Halo-14B-sce/156424f1-2a1e-4e61-b081-bb066ee3958d.json deleted file mode 100644 index 709ddae210940ae4d89ac4b383d094eb0bfcd424..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Halo-14B-sce/156424f1-2a1e-4e61-b081-bb066ee3958d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Halo-14B-sce/1762652579.822633", - "retrieved_timestamp": "1762652579.822633", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Halo-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Halo-14B-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6753691316817156 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6875692490185378 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42900302114803623 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44007291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5376496010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Heretic1.5b/e3d7453d-0ba6-4980-be81-827122149bb6.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Heretic1.5b/e3d7453d-0ba6-4980-be81-827122149bb6.json deleted file mode 100644 index dbf0fe0b35dbb2cde1da02b6a1d84cb34b8cbdaf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Heretic1.5b/e3d7453d-0ba6-4980-be81-827122149bb6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Heretic1.5b/1762652579.8228369", - "retrieved_timestamp": "1762652579.8228369", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Heretic1.5b", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Heretic1.5b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20615633186611523 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3529180801121154 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24395770392749244 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3511458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17278922872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.73 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Hyde-14b-sce/814ce716-6f61-4980-a8f6-7918c7b0eea5.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Hyde-14b-sce/814ce716-6f61-4980-a8f6-7918c7b0eea5.json deleted file mode 100644 index ecb6e8528e417a202526be4bd1d70648ca23ea07..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Hyde-14b-sce/814ce716-6f61-4980-a8f6-7918c7b0eea5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Hyde-14b-sce/1762652579.823039", - "retrieved_timestamp": "1762652579.823039", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Hyde-14b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Hyde-14b-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6715470507143269 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6885164810743584 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27341389728096677 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41409375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5300033244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Imagine-v0.5-16bit/ccb33ad4-98f5-4980-a442-1a1772fab792.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Imagine-v0.5-16bit/ccb33ad4-98f5-4980-a442-1a1772fab792.json deleted file mode 100644 index c29cc7b9a3833456bb92c73e615583263c517b0b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Imagine-v0.5-16bit/ccb33ad4-98f5-4980-a442-1a1772fab792.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Imagine-v0.5-16bit/1762652579.823242", - "retrieved_timestamp": "1762652579.823243", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Imagine-v0.5-16bit", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Imagine-v0.5-16bit" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2758990589413866 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6769135492947932 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13972809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3649328859060403 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43492708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.535405585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Imbue-14b/c50c07fc-b529-43c9-9f3d-0f1ff174b905.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Imbue-14b/c50c07fc-b529-43c9-9f3d-0f1ff174b905.json deleted file mode 100644 index 751705fffcacb5dc0aa751c99d07d3a4f180345f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Imbue-14b/c50c07fc-b529-43c9-9f3d-0f1ff174b905.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Imbue-14b/1762652579.8234398", - "retrieved_timestamp": "1762652579.8234408", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Imbue-14b", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Imbue-14b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5199725616918665 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6845292092854045 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41672916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5402260638297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Insom/51f419c6-1107-41c9-896b-fadbbde4f5e9.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Insom/51f419c6-1107-41c9-896b-fadbbde4f5e9.json deleted file mode 100644 index b688722143ca5ad906cdc3d6c2f907c8445a8f31..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Insom/51f419c6-1107-41c9-896b-fadbbde4f5e9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Insom/1762652579.823634", - "retrieved_timestamp": "1762652579.8236349", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Insom", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Insom" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.68183863260593 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6881456689046391 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3498322147651007 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43114583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5352393617021277 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_InspectorDeck-14B-sce/1ac547e3-1b29-462a-aa08-1e9ef9e3f409.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_InspectorDeck-14B-sce/1ac547e3-1b29-462a-aa08-1e9ef9e3f409.json deleted file mode 100644 index a04e078539d528fc5096baff79d773e84071ed8f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_InspectorDeck-14B-sce/1ac547e3-1b29-462a-aa08-1e9ef9e3f409.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_InspectorDeck-14B-sce/1762652579.8238342", - "retrieved_timestamp": "1762652579.8238342", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/InspectorDeck-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/InspectorDeck-14B-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32408454013129606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6668480318764974 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3164652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39815625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5260970744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Jekyl-8b-sce/dc6a9e35-c130-4edc-93bc-5f0b6ac0e05d.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Jekyl-8b-sce/dc6a9e35-c130-4edc-93bc-5f0b6ac0e05d.json deleted file mode 100644 index 87a93a91778824509d26cadee6f3b3db28434ede..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Jekyl-8b-sce/dc6a9e35-c130-4edc-93bc-5f0b6ac0e05d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Jekyl-8b-sce/1762652579.82404", - "retrieved_timestamp": "1762652579.824041", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Jekyl-8b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Jekyl-8b-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46968931324441365 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4993588236391566 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16163141993957703 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41966666666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686003989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Jigsaw-14B-Linear/7533defe-b19d-4571-a403-c443ec03a31b.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Jigsaw-14B-Linear/7533defe-b19d-4571-a403-c443ec03a31b.json deleted file mode 100644 index 9a646a47b9407ddaa1643bfc72f4e9ab5f088a9e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Jigsaw-14B-Linear/7533defe-b19d-4571-a403-c443ec03a31b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Jigsaw-14B-Linear/1762652579.824291", - "retrieved_timestamp": "1762652579.824291", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Jigsaw-14B-Linear", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Jigsaw-14B-Linear" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6480416406246536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6864625931836906 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26510574018126887 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060402684563756 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44826041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5233543882978723 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Katana-8b-sce/dea8c833-7deb-43f8-9b15-acbadf4fc749.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Katana-8b-sce/dea8c833-7deb-43f8-9b15-acbadf4fc749.json deleted file mode 100644 index c4a541ffc947e70cdfcf43c73290678eb0f7c8f8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Katana-8b-sce/dea8c833-7deb-43f8-9b15-acbadf4fc749.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Katana-8b-sce/1762652579.8246028", - "retrieved_timestamp": "1762652579.8246038", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Katana-8b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Katana-8b-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5107304175144174 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5074684221457483 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1510574018126888 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4037604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3770777925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Knot-CoT-14B-sce/fe0b75bf-2035-4ffe-8cbf-d5f4c66907aa.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Knot-CoT-14B-sce/fe0b75bf-2035-4ffe-8cbf-d5f4c66907aa.json deleted file mode 100644 index 17bf6a2c10020469573030e5da45bfa503802861..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Knot-CoT-14B-sce/fe0b75bf-2035-4ffe-8cbf-d5f4c66907aa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Knot-CoT-14B-sce/1762652579.8248682", - "retrieved_timestamp": "1762652579.8248692", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Knot-CoT-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Knot-CoT-14B-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4831779677921249 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6615610657544672 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3995468277945619 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41403125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.515375664893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Lineage-14B/37f890b7-5487-46ea-b61e-d91b5349d078.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Lineage-14B/37f890b7-5487-46ea-b61e-d91b5349d078.json deleted file mode 100644 index aaa0f4b34fe7ad7b96e6ad90e57426fca254dd8f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Lineage-14B/37f890b7-5487-46ea-b61e-d91b5349d078.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Lineage-14B/1762652579.82509", - "retrieved_timestamp": "1762652579.8250911", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Lineage-14B", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Lineage-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7070428684778609 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6933789516730196 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4244712990936556 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3598993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4597291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5410571808510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Loke-14B-sce/cfac443e-5c66-45e3-bf7a-7c596d01d4ff.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Loke-14B-sce/cfac443e-5c66-45e3-bf7a-7c596d01d4ff.json deleted file mode 100644 index 503da1ce691cdfcf5417ea36f67d1f29a8a25aa1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Loke-14B-sce/cfac443e-5c66-45e3-bf7a-7c596d01d4ff.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Loke-14B-sce/1762652579.825529", - "retrieved_timestamp": "1762652579.82553", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Loke-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Loke-14B-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6847863668399845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6923902176707362 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3904833836858006 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3649328859060403 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46366666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5401429521276596 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_MFDOOM-14B/3efa12a5-4525-4ee9-80bd-99c4b8d2ccb2.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_MFDOOM-14B/3efa12a5-4525-4ee9-80bd-99c4b8d2ccb2.json deleted file mode 100644 index 901ed05e8129fe82847dc2b1f2967071515ffccf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_MFDOOM-14B/3efa12a5-4525-4ee9-80bd-99c4b8d2ccb2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_MFDOOM-14B/1762652579.825741", - "retrieved_timestamp": "1762652579.825742", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/MFDOOM-14B", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/MFDOOM-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6736204382150472 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6916400252742457 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5264350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43765625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5425531914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_MFGRIMM-14B/773228d8-7e03-4ba8-87c1-f59ac5aad425.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_MFGRIMM-14B/773228d8-7e03-4ba8-87c1-f59ac5aad425.json deleted file mode 100644 index f7e9cc965ae8af6fc5fec7b0533a4b1c34af9ba1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_MFGRIMM-14B/773228d8-7e03-4ba8-87c1-f59ac5aad425.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_MFGRIMM-14B/1762652579.8259468", - "retrieved_timestamp": "1762652579.825948", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/MFGRIMM-14B", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/MFGRIMM-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6894074389287091 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.69087746819662 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5060422960725075 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43613541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5416389627659575 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Mithril-14B-sce/8ab4e441-2efb-4510-87ea-43f3fbcc67ac.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Mithril-14B-sce/8ab4e441-2efb-4510-87ea-43f3fbcc67ac.json deleted file mode 100644 index 157f65994d4a16f9e358e676e51d352f8a382518..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Mithril-14B-sce/8ab4e441-2efb-4510-87ea-43f3fbcc67ac.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Mithril-14B-sce/1762652579.826359", - "retrieved_timestamp": "1762652579.82636", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Mithril-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Mithril-14B-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6957772044841022 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6925969240705362 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3821752265861027 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4610625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5403091755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Mononoke-14B-sce/6f2d122b-f7fe-448a-ac8b-864314e94692.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Mononoke-14B-sce/6f2d122b-f7fe-448a-ac8b-864314e94692.json deleted file mode 100644 index 5f2beee581615a9fd7c7dc5f6921084aa996d2f1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Mononoke-14B-sce/6f2d122b-f7fe-448a-ac8b-864314e94692.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Mononoke-14B-sce/1762652579.8265631", - "retrieved_timestamp": "1762652579.826564", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Mononoke-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Mononoke-14B-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3502129904209719 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6744431226588331 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4697885196374622 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4154583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5297539893617021 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Motion-8B-Linear/db82138b-f915-4451-aa85-8bc4c7fdd225.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Motion-8B-Linear/db82138b-f915-4451-aa85-8bc4c7fdd225.json deleted file mode 100644 index f5ed2ea441b7e07fbbd1774310e2e165753158ee..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Motion-8B-Linear/db82138b-f915-4451-aa85-8bc4c7fdd225.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Motion-8B-Linear/1762652579.826771", - "retrieved_timestamp": "1762652579.826771", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Motion-8B-Linear", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Motion-8B-Linear" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7685917809190725 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5084252652465131 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18882175226586104 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36060416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3784906914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Mouse-9B/70e3145f-d67b-403d-af2a-1b06b2ba0f24.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Mouse-9B/70e3145f-d67b-403d-af2a-1b06b2ba0f24.json deleted file mode 100644 index 7040b36539124cbf3887936d4c6c8060141ea6f5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Mouse-9B/70e3145f-d67b-403d-af2a-1b06b2ba0f24.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Mouse-9B/1762652579.826978", - "retrieved_timestamp": "1762652579.826978", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Mouse-9B", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Mouse-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1324917884546337 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29789470527601253 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3469583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11386303191489362 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 9.207 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Nova-14b-sce/3336c8fa-fcef-4513-946d-9254f537e418.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Nova-14b-sce/3336c8fa-fcef-4513-946d-9254f537e418.json deleted file mode 100644 index 70994c6afc6c49025b8c2d68ca3f7810309ef423..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Nova-14b-sce/3336c8fa-fcef-4513-946d-9254f537e418.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Nova-14b-sce/1762652579.827177", - "retrieved_timestamp": "1762652579.827178", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Nova-14b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Nova-14b-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7021968377239058 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6935261478148286 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4161631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36325503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4570625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5413065159574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_NovaScotia-14b-stock/8ab3ce59-d0cd-4764-98c7-c4df81bc3c23.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_NovaScotia-14b-stock/8ab3ce59-d0cd-4764-98c7-c4df81bc3c23.json deleted file mode 100644 index 8cf9a071a7b262416d716149dc3a80476e2eb4d6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_NovaScotia-14b-stock/8ab3ce59-d0cd-4764-98c7-c4df81bc3c23.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_NovaScotia-14b-stock/1762652579.827381", - "retrieved_timestamp": "1762652579.827381", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/NovaScotia-14b-stock", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/NovaScotia-14b-stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6787412953186434 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6935261478148286 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44934375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5408909574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_ODB-14B-sce/66743ed1-93ab-41f7-9002-0080e7f74722.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_ODB-14B-sce/66743ed1-93ab-41f7-9002-0080e7f74722.json deleted file mode 100644 index f6982052b06ee0aa46b7804bd07e41de3a59e346..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_ODB-14B-sce/66743ed1-93ab-41f7-9002-0080e7f74722.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_ODB-14b-sce/1762652579.827807", - "retrieved_timestamp": "1762652579.827808", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/ODB-14b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/ODB-14b-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7015973173402128 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6941928144814953 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.411631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624161073825503 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4570625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5411402925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_ODB-14B-sce/79d7d2a1-dcb6-40a7-b29c-7213ebd261df.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_ODB-14B-sce/79d7d2a1-dcb6-40a7-b29c-7213ebd261df.json deleted file mode 100644 index c5d36571c9a75a80aefa526847fca0dc8aacefaf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_ODB-14B-sce/79d7d2a1-dcb6-40a7-b29c-7213ebd261df.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_ODB-14B-sce/1762652579.827594", - "retrieved_timestamp": "1762652579.827595", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/ODB-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/ODB-14B-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.292235712354331 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6558922017209644 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2545317220543807 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39288541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206948138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Unknown", - "params_billions": 0.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Oasis-14B-ties/a3ef4bc2-c560-4a62-8227-2bd30120b537.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Oasis-14B-ties/a3ef4bc2-c560-4a62-8227-2bd30120b537.json deleted file mode 100644 index 9a3a7a793c44b132d348b26dc375606125a02e73..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Oasis-14B-ties/a3ef4bc2-c560-4a62-8227-2bd30120b537.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Oasis-14B-ties/1762652579.827992", - "retrieved_timestamp": "1762652579.8279932", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Oasis-14B-ties", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Oasis-14B-ties" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6936539492989712 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6914976731342066 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37537764350453173 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3649328859060403 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4570625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5404753989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Origami-14B-sce/82826944-e4a1-47bd-b240-c70e21acfc51.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Origami-14B-sce/82826944-e4a1-47bd-b240-c70e21acfc51.json deleted file mode 100644 index 1c209a31b1da2cc504934cf58e4017e716e4940c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Origami-14B-sce/82826944-e4a1-47bd-b240-c70e21acfc51.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Origami-14B-sce/1762652579.828193", - "retrieved_timestamp": "1762652579.8281941", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Origami-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Origami-14B-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3259329689667859 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6620277470720752 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29154078549848944 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40348958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5244348404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Ponder-14B-linear/30942374-a112-4035-a4f2-e30bff57f9ce.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Ponder-14B-linear/30942374-a112-4035-a4f2-e30bff57f9ce.json deleted file mode 100644 index a76444f99aca596527dc0e8156d61b911fe6316f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Ponder-14B-linear/30942374-a112-4035-a4f2-e30bff57f9ce.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Ponder-14B-linear/1762652579.8290088", - "retrieved_timestamp": "1762652579.8290088", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Ponder-14B-linear", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Ponder-14B-linear" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6906064796960952 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6942602302118323 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4282477341389728 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35822147651006714 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45576041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5408078457446809 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_RZA-14B-sce/e8a8cf1f-5bcf-45ae-b590-fb04de06b77f.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_RZA-14B-sce/e8a8cf1f-5bcf-45ae-b590-fb04de06b77f.json deleted file mode 100644 index 962a40719ddff9d765791a4eee5ca8718a76f9b7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_RZA-14B-sce/e8a8cf1f-5bcf-45ae-b590-fb04de06b77f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_RZA-14B-sce/1762652579.829216", - "retrieved_timestamp": "1762652579.829216", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/RZA-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/RZA-14B-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4773578549360142 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6685829139021245 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41133333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.538314494680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Rosemary-14b/84018db9-2b85-4b6f-beff-b4930b230399.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Rosemary-14b/84018db9-2b85-4b6f-beff-b4930b230399.json deleted file mode 100644 index f438e496fec9a7a6779d34038db8d6c676b564ba..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Rosemary-14b/84018db9-2b85-4b6f-beff-b4930b230399.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Rosemary-14b/1762652579.829469", - "retrieved_timestamp": "1762652579.82947", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Rosemary-14b", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Rosemary-14b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6915306941138402 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6955261478148286 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.438821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565436241610738 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44921875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5396442819148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Rune-14b/3ed52eaf-6b73-46ab-8ae7-3afe120fe437.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Rune-14b/3ed52eaf-6b73-46ab-8ae7-3afe120fe437.json deleted file mode 100644 index 4817ea5e2142106888a9acabc977a2e2d6def515..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Rune-14b/3ed52eaf-6b73-46ab-8ae7-3afe120fe437.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Rune-14b/1762652579.829681", - "retrieved_timestamp": "1762652579.8296819", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Rune-14b", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Rune-14b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7015973173402128 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6937489642141156 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45845921450151056 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45328125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5411402925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_SZA-14B-sce/6d983237-925e-4197-a592-17cca9219bda.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_SZA-14B-sce/6d983237-925e-4197-a592-17cca9219bda.json deleted file mode 100644 index bf5fc724580c6935f3d9cb7a04be35841b38ee3a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_SZA-14B-sce/6d983237-925e-4197-a592-17cca9219bda.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_SZA-14B-sce/1762652579.829889", - "retrieved_timestamp": "1762652579.82989", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/SZA-14B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/SZA-14B-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5659095644002359 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6888749072998727 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5241691842900302 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.433875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5353224734042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Sake-20b/25a672ed-3e0e-416f-abf4-a935e63171c6.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Sake-20b/25a672ed-3e0e-416f-abf4-a935e63171c6.json deleted file mode 100644 index 12ec012b94fdd6bf6faeb7c82fe0e755120519db..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Sake-20b/25a672ed-3e0e-416f-abf4-a935e63171c6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Sake-20b/1762652579.830092", - "retrieved_timestamp": "1762652579.8300931", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Sake-20b", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Sake-20b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6692741924759638 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6769823539837527 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44940625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5391456117021277 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 21.475 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Spok-14b-sce/9f15293c-5668-4895-b4d0-4062cac344e7.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Spok-14b-sce/9f15293c-5668-4895-b4d0-4062cac344e7.json deleted file mode 100644 index 5fdb8fc043440d99dfc9c32d3a15b951155b97a6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Spok-14b-sce/9f15293c-5668-4895-b4d0-4062cac344e7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Spok-14b-sce/1762652579.830291", - "retrieved_timestamp": "1762652579.830292", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Spok-14b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Spok-14b-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6681748870773991 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6899172301380289 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2719033232628399 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41409375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5297539893617021 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Sumatra-20b/ae69fb3f-19a1-4b00-9309-8685e107aeba.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Sumatra-20b/ae69fb3f-19a1-4b00-9309-8685e107aeba.json deleted file mode 100644 index 51311249502ec822e7454d00b979ad0297bcc7b3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Sumatra-20b/ae69fb3f-19a1-4b00-9309-8685e107aeba.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Sumatra-20b/1762652579.830487", - "retrieved_timestamp": "1762652579.830488", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Sumatra-20b", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Sumatra-20b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.673795529195867 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6855416597047258 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4560104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5414727393617021 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 21.475 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_SuperNova14b/b0659361-fb53-40db-81a7-2a72771bbd1a.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_SuperNova14b/b0659361-fb53-40db-81a7-2a72771bbd1a.json deleted file mode 100644 index e6ceb3c3bae9396c6dfc64b55f862914b547d123..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_SuperNova14b/b0659361-fb53-40db-81a7-2a72771bbd1a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_SuperNova14b/1762652579.830682", - "retrieved_timestamp": "1762652579.830683", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/SuperNova14b", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/SuperNova14b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.707642388861554 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6937489642141156 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4395770392749245 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4545208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.543467420212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_TB0-8B-sce/8f0da98a-cf9f-4cbb-8d4a-8c12d737580c.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_TB0-8B-sce/8f0da98a-cf9f-4cbb-8d4a-8c12d737580c.json deleted file mode 100644 index ac3d8cee1987b81c8adedab4f468624473c05d0d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_TB0-8B-sce/8f0da98a-cf9f-4cbb-8d4a-8c12d737580c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_TB0-8B-sce/1762652579.8308768", - "retrieved_timestamp": "1762652579.8308768", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/TB0-8B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/TB0-8B-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5107304175144174 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5074684221457483 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1510574018126888 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4037604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3770777925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_TBL-8B-sce/4bff88c0-89fb-4d07-a83d-251c7aaeace4.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_TBL-8B-sce/4bff88c0-89fb-4d07-a83d-251c7aaeace4.json deleted file mode 100644 index 5ed9a174b5c48801f671a4c0b7e9906a2bf585b6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_TBL-8B-sce/4bff88c0-89fb-4d07-a83d-251c7aaeace4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_TBL-8B-sce/1762652579.831074", - "retrieved_timestamp": "1762652579.831075", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/TBL-8B-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/TBL-8B-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45809895521660304 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5008187839060233 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15332326283987915 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42363541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3689328457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Venti-20b/2b97259b-d7a5-4934-b350-7b1322964899.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Venti-20b/2b97259b-d7a5-4934-b350-7b1322964899.json deleted file mode 100644 index 0029072e0567b9cff9caccdc8267b43c8a9ac52a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Venti-20b/2b97259b-d7a5-4934-b350-7b1322964899.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Venti-20b/1762652579.8314738", - "retrieved_timestamp": "1762652579.831475", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Venti-20b", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Venti-20b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6641034676879568 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6901240010129452 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3391238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44797916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5386469414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 21.475 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Venti-Blend-sce/e9fa96ff-d790-4948-9071-dd1376701fc1.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Venti-Blend-sce/e9fa96ff-d790-4948-9071-dd1376701fc1.json deleted file mode 100644 index 9130e62f42066233160ae42e18e0784aba7c16b5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Venti-Blend-sce/e9fa96ff-d790-4948-9071-dd1376701fc1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Venti-Blend-sce/1762652579.831816", - "retrieved_timestamp": "1762652579.8318179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Venti-Blend-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Venti-Blend-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6879335718116819 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6842921511560114 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40558912386706947 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43892708333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5413896276595744 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 21.475 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Vine-14b-sce/70d25d8c-96e9-45e4-b0d1-684a89278064.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Vine-14b-sce/70d25d8c-96e9-45e4-b0d1-684a89278064.json deleted file mode 100644 index 05a5f424cf8368d042a81cf607e8e295eb8be151..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Vine-14b-sce/70d25d8c-96e9-45e4-b0d1-684a89278064.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Vine-14b-sce/1762652579.8321972", - "retrieved_timestamp": "1762652579.832198", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Vine-14b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Vine-14b-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.673345611865406 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6891400252742456 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5007552870090635 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4322916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5408078457446809 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Wendy-14B/13e6cad7-a063-4530-bec9-e70e4e98ccc0.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Wendy-14B/13e6cad7-a063-4530-bec9-e70e4e98ccc0.json deleted file mode 100644 index f398b89d867ed4b8df644b97812fc2433934e972..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Wendy-14B/13e6cad7-a063-4530-bec9-e70e4e98ccc0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Wendy-14B/1762652579.832468", - "retrieved_timestamp": "1762652579.832469", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Wendy-14B", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Wendy-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6772175605172055 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6957587467354328 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48338368580060426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4428020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.543467420212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Wu-14b-sce/35443539-9756-466b-a36f-66adc5f68ddb.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Wu-14b-sce/35443539-9756-466b-a36f-66adc5f68ddb.json deleted file mode 100644 index d0bdfb6e1044fc6046f7745bab2c46e9e72b3b5c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Wu-14b-sce/35443539-9756-466b-a36f-66adc5f68ddb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Wu-14b-sce/1762652579.832721", - "retrieved_timestamp": "1762652579.832722", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Wu-14b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/Wu-14b-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6718218770639681 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6885164810743585 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26132930513595165 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41142708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5292553191489362 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_bloom-14b-stock/1a2b4a76-0feb-4404-a1ef-0408c75f2ca7.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_bloom-14b-stock/1a2b4a76-0feb-4404-a1ef-0408c75f2ca7.json deleted file mode 100644 index 4e3c94ba505091edf2489c0f203a61241fe6709e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_bloom-14b-stock/1a2b4a76-0feb-4404-a1ef-0408c75f2ca7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_bloom-14b-stock/1762652579.8329449", - "retrieved_timestamp": "1762652579.8329458", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/bloom-14b-stock", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/bloom-14b-stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6575087434673332 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6877869223612597 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4811178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43095833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5373171542553191 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_caramel-14B/a9d4b6a9-33af-42a3-be29-d3214a171433.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_caramel-14B/a9d4b6a9-33af-42a3-be29-d3214a171433.json deleted file mode 100644 index 595e209ea28dc23655a34ee65c37628e12190836..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_caramel-14B/a9d4b6a9-33af-42a3-be29-d3214a171433.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_caramel-14B/1762652579.833162", - "retrieved_timestamp": "1762652579.833163", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/caramel-14B", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/caramel-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6744947849483814 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6918707471458787 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47129909365558914 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447986577181208 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.445375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5435505319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_mocha-14B/5c04fa63-11be-42d8-8133-4e79e08e42ad.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_mocha-14B/5c04fa63-11be-42d8-8133-4e79e08e42ad.json deleted file mode 100644 index 07b3aaec1439836861735e8d302f30287e70c3a0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_mocha-14B/5c04fa63-11be-42d8-8133-4e79e08e42ad.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_mocha-14B/1762652579.833622", - "retrieved_timestamp": "1762652579.833623", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/mocha-14B", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/mocha-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5893152391210876 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6894730595527842 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5264350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4271770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5383976063829787 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_mosaic-14b-sce/4fd82b3e-4b13-4e21-9253-6492f8b1feaa.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_mosaic-14b-sce/4fd82b3e-4b13-4e21-9253-6492f8b1feaa.json deleted file mode 100644 index f92e34bff3c376bf1151794f339ac0d63a666a56..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_mosaic-14b-sce/4fd82b3e-4b13-4e21-9253-6492f8b1feaa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_mosaic-14b-sce/1762652579.8338351", - "retrieved_timestamp": "1762652579.833836", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/mosaic-14b-sce", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/mosaic-14b-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6875590100932193 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6907089244809823 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4025679758308157 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624161073825503 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45579166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5396442819148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_tesseract-14b-stock/4311b63a-282b-4c16-8609-a1d4ab93ace9.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_tesseract-14b-stock/4311b63a-282b-4c16-8609-a1d4ab93ace9.json deleted file mode 100644 index 27e67e8ba2aea9284e5afa387a279db76d629dd4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_tesseract-14b-stock/4311b63a-282b-4c16-8609-a1d4ab93ace9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_tesseract-14b-stock/1762652579.834054", - "retrieved_timestamp": "1762652579.834055", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/tesseract-14b-stock", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/tesseract-14b-stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5847939024011845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6880007346047826 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5143504531722054 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42323958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5388962765957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_time-14b-stock/2755da2c-8347-4bbd-80ee-c58e77a26f5e.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_time-14b-stock/2755da2c-8347-4bbd-80ee-c58e77a26f5e.json deleted file mode 100644 index 21641aeda8c02647e7f50ebba11b844d33102483..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_time-14b-stock/2755da2c-8347-4bbd-80ee-c58e77a26f5e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_time-14b-stock/1762652579.834393", - "retrieved_timestamp": "1762652579.8343942", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/time-14b-stock", - "developer": "Quazim0t0", - "inference_platform": "unknown", - "id": "Quazim0t0/time-14b-stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6699235805440675 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6897025970028126 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5083081570996979 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43232291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5418882978723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-0.5B-Chat/96baee1a-7ea7-454f-ac8b-fe1bead3cd93.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-0.5B-Chat/96baee1a-7ea7-454f-ac8b-fe1bead3cd93.json deleted file mode 100644 index a76aa8368083ece9b4bf4beb08b16368517f97fb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-0.5B-Chat/96baee1a-7ea7-454f-ac8b-fe1bead3cd93.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-0.5B-Chat/1762652579.835679", - "retrieved_timestamp": "1762652579.83568", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen1.5-0.5B-Chat", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-0.5B-Chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18072713732895385 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3166662152036714 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3837083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12125997340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.62 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-1.8B-Chat/d6107bde-875e-40f6-8471-3a3507758910.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-1.8B-Chat/d6107bde-875e-40f6-8471-3a3507758910.json deleted file mode 100644 index 14f97dd401893e9af603b3316500c913a836b754..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-1.8B-Chat/d6107bde-875e-40f6-8471-3a3507758910.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-1.8B-Chat/1762652579.836214", - "retrieved_timestamp": "1762652579.836215", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen1.5-1.8B-Chat", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-1.8B-Chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20190982149585324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3255912875735599 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42596875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18035239361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.837 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-110B-Chat/7cfcae3d-b623-4cf0-9ac8-529db46d05e6.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-110B-Chat/7cfcae3d-b623-4cf0-9ac8-529db46d05e6.json deleted file mode 100644 index 5e6f09846542b028c666e5bc513061bc4d84df18..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-110B-Chat/7cfcae3d-b623-4cf0-9ac8-529db46d05e6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-110B-Chat/1762652579.836649", - "retrieved_timestamp": "1762652579.836649", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen1.5-110B-Chat", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-110B-Chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5938864435254014 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6183800385588633 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45216666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48246343085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 111.21 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-14B-Chat/e2cdcc99-a1b6-43ee-9cda-2e7ccbd0ad8d.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-14B-Chat/e2cdcc99-a1b6-43ee-9cda-2e7ccbd0ad8d.json deleted file mode 100644 index 9d8b6c0c93ba01f9536e7fb05042327d34de633b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-14B-Chat/e2cdcc99-a1b6-43ee-9cda-2e7ccbd0ad8d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-14B-Chat/1762652579.837058", - "retrieved_timestamp": "1762652579.837059", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen1.5-14B-Chat", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-14B-Chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47680820223673187 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5228587510703555 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15256797583081572 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43997916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36178523936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.167 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-32B-Chat/c14a0d32-1d27-4596-90d4-10a793aef9a2.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-32B-Chat/c14a0d32-1d27-4596-90d4-10a793aef9a2.json deleted file mode 100644 index e31bf21b1cabcc206fcae24d9645e68396a9c7e7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-32B-Chat/c14a0d32-1d27-4596-90d4-10a793aef9a2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-32B-Chat/1762652579.8374798", - "retrieved_timestamp": "1762652579.8374798", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen1.5-32B-Chat", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-32B-Chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5532199009738605 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6066899757930234 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19561933534743203 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4159791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4457280585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.512 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-4B-Chat/e3417d3e-7883-45a7-a631-9e5d105788c4.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-4B-Chat/e3417d3e-7883-45a7-a631-9e5d105788c4.json deleted file mode 100644 index fb295ebc703f8e6ac838d2b850ce9588f53b429c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-4B-Chat/e3417d3e-7883-45a7-a631-9e5d105788c4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-4B-Chat/1762652579.837912", - "retrieved_timestamp": "1762652579.837912", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen1.5-4B-Chat", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-4B-Chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31566576683200576 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40055485611486114 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39778125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23961103723404256 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.95 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-7B-Chat/42e3c9e4-bf1a-43ae-87e7-056f735abe03.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-7B-Chat/42e3c9e4-bf1a-43ae-87e7-056f735abe03.json deleted file mode 100644 index eacf6abade4df6437bf927b7ae225dd6b667ed22..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-7B-Chat/42e3c9e4-bf1a-43ae-87e7-056f735abe03.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-7B-Chat/1762652579.838321", - "retrieved_timestamp": "1762652579.838322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen1.5-7B-Chat", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-7B-Chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43711574178734647 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4510053116521351 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37790624999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2951296542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.721 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-MoE-A2.7B-Chat/daec0873-964e-459e-a1a1-49da96cd17cf.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-MoE-A2.7B-Chat/daec0873-964e-459e-a1a1-49da96cd17cf.json deleted file mode 100644 index 49af3c35cadbf31fa060c232173c65779412479a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-MoE-A2.7B-Chat/daec0873-964e-459e-a1a1-49da96cd17cf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-MoE-A2.7B-Chat/1762652579.838758", - "retrieved_timestamp": "1762652579.838758", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen1.5-MoE-A2.7B-Chat", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-MoE-A2.7B-Chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37953851336675576 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4272088620635824 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38987499999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29230385638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 14.316 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-0.5B-Instruct/6986e9f0-d008-4418-b3cb-1e870cf57e02.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-0.5B-Instruct/6986e9f0-d008-4418-b3cb-1e870cf57e02.json deleted file mode 100644 index 1bfcb1fac7cbd4d9fc96be8ef86c4b2a2030899f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-0.5B-Instruct/6986e9f0-d008-4418-b3cb-1e870cf57e02.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-0.5B-Instruct/1762652579.839177", - "retrieved_timestamp": "1762652579.839178", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2-0.5B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-0.5B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22466610814860127 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31725179384863494 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24664429530201343 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33527083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15309175531914893 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-1.5B-Instruct/984029c7-f957-4555-8460-dfecd99f44a1.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-1.5B-Instruct/984029c7-f957-4555-8460-dfecd99f44a1.json deleted file mode 100644 index 147eebc5c4bc09b45b7706dbeea1420346d61b02..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-1.5B-Instruct/984029c7-f957-4555-8460-dfecd99f44a1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-1.5B-Instruct/1762652579.839607", - "retrieved_timestamp": "1762652579.839607", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2-1.5B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-1.5B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3371232773485463 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3852232408376059 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42928125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25008311170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-57B-A14B-Instruct/50496313-dc6c-4456-8a8c-15cd8ddbb480.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-57B-A14B-Instruct/50496313-dc6c-4456-8a8c-15cd8ddbb480.json deleted file mode 100644 index c695fade272b6b3e7ef5067a2494567c970ed9b0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-57B-A14B-Instruct/50496313-dc6c-4456-8a8c-15cd8ddbb480.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-57B-A14B-Instruct/1762652579.84003", - "retrieved_timestamp": "1762652579.840031", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2-57B-A14B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-57B-A14B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6337783747124297 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5887606963532052 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28172205438066467 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43613541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45752992021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 57.409 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-72B-Instruct/d9ae7c35-ac71-4703-9cfe-bf5fb5aa688e.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-72B-Instruct/d9ae7c35-ac71-4703-9cfe-bf5fb5aa688e.json deleted file mode 100644 index 4cfc17031f8898f9561764c960aa64a9379cd2df..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-72B-Instruct/d9ae7c35-ac71-4703-9cfe-bf5fb5aa688e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-72B-Instruct/1762652579.840446", - "retrieved_timestamp": "1762652579.840447", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2-72B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-72B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7989168738945996 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.697730968386067 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4176737160120846 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3724832214765101 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4560104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5403091755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-7B-Instruct/3e1ebb01-6fbb-498c-af58-022f50247ec9.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-7B-Instruct/3e1ebb01-6fbb-498c-af58-022f50247ec9.json deleted file mode 100644 index ed17e358a73d4557d460e09982adcebca06db773..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-7B-Instruct/3e1ebb01-6fbb-498c-af58-022f50247ec9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-7B-Instruct/1762652579.84092", - "retrieved_timestamp": "1762652579.84092", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2-7B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5679075962889577 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5544781563793189 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39279166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38472406914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-Math-72B-Instruct/1c7bb42e-aa1c-4522-a4b0-bcc460876125.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-Math-72B-Instruct/1c7bb42e-aa1c-4522-a4b0-bcc460876125.json deleted file mode 100644 index 5bbbd45f2a43d27dc28d36eb71eab283da6822ca..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-Math-72B-Instruct/1c7bb42e-aa1c-4522-a4b0-bcc460876125.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-Math-72B-Instruct/1762652579.841145", - "retrieved_timestamp": "1762652579.8411462", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2-Math-72B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-Math-72B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.569381463405985 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.634337660025181 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5536253776435045 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36828859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45169791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42727726063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-VL-72B-Instruct/2f749e28-b845-45ab-a628-8f9b6a9029d9.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-VL-72B-Instruct/2f749e28-b845-45ab-a628-8f9b6a9029d9.json deleted file mode 100644 index e4648e325dca335fc662d9f14b067c787f2b1888..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-VL-72B-Instruct/2f749e28-b845-45ab-a628-8f9b6a9029d9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-VL-72B-Instruct/1762652579.841569", - "retrieved_timestamp": "1762652579.8415701", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2-VL-72B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-VL-72B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5982326892644849 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6946287292338682 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34441087613293053 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44921875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5717253989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2VLForConditionalGeneration", - "params_billions": 73.406 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-VL-7B-Instruct/6dd0eebe-ef61-431d-bf7c-c170475bed5f.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-VL-7B-Instruct/6dd0eebe-ef61-431d-bf7c-c170475bed5f.json deleted file mode 100644 index 888c91fe2144812cd7b2994b053ce3bc434da5af..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-VL-7B-Instruct/6dd0eebe-ef61-431d-bf7c-c170475bed5f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-VL-7B-Instruct/1762652579.841773", - "retrieved_timestamp": "1762652579.841774", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2-VL-7B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-VL-7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4599218961245052 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5464507159069989 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1986404833836858 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40949135638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2VLForConditionalGeneration", - "params_billions": 8.291 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-0.5B-Instruct/14d1ea99-ae05-42cd-9f2f-de1a98d9846d.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-0.5B-Instruct/14d1ea99-ae05-42cd-9f2f-de1a98d9846d.json deleted file mode 100644 index edb8fb1389b7e884ae86b90890461874a0ede874..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-0.5B-Instruct/14d1ea99-ae05-42cd-9f2f-de1a98d9846d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-0.5B-Instruct/1762652579.842413", - "retrieved_timestamp": "1762652579.8424141", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-0.5B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-0.5B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31529120511354314 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3321916429549138 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3341875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17195811170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-0.5B-Instruct/883755e2-69eb-459b-ae7f-5548914aa65e.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-0.5B-Instruct/883755e2-69eb-459b-ae7f-5548914aa65e.json deleted file mode 100644 index 4879eaba0191a0401fa88ffc59e51b13c3716195..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-0.5B-Instruct/883755e2-69eb-459b-ae7f-5548914aa65e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-0.5B-Instruct/1762652579.842189", - "retrieved_timestamp": "1762652579.84219", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-0.5B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-0.5B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.307122878407071 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3340729214937266 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33288541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16971409574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.5 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-1.5B-Instruct/9744dd76-a8cd-4400-92a7-f10b375710ae.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-1.5B-Instruct/9744dd76-a8cd-4400-92a7-f10b375710ae.json deleted file mode 100644 index 791b75f7b5732abb1621fff89500d379d4fb934f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-1.5B-Instruct/9744dd76-a8cd-4400-92a7-f10b375710ae.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-1.5B-Instruct/1762652579.842835", - "retrieved_timestamp": "1762652579.842836", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-1.5B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-1.5B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4475569267321817 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4288982740422907 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3663125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27992021276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.5 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-14B-Instruct-1M/52ff136b-084f-4ca3-a48e-83fb0bbd8ebc.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-14B-Instruct-1M/52ff136b-084f-4ca3-a48e-83fb0bbd8ebc.json deleted file mode 100644 index f93c2aea9aa88a88a51dd74ecd880c94846015bb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-14B-Instruct-1M/52ff136b-084f-4ca3-a48e-83fb0bbd8ebc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-14B-Instruct-1M/1762652579.843473", - "retrieved_timestamp": "1762652579.843473", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-14B-Instruct-1M", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-14B-Instruct-1M" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8413564896696322 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6198222551365405 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5302114803625377 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.418 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4849567819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-14B-Instruct/1f3e04ab-9f97-4eda-9d40-669eda073ac3.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-14B-Instruct/1f3e04ab-9f97-4eda-9d40-669eda073ac3.json deleted file mode 100644 index 518abf00a6aad297a42e84952c7e3a2090264976..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-14B-Instruct/1f3e04ab-9f97-4eda-9d40-669eda073ac3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-14B-Instruct/1762652579.843263", - "retrieved_timestamp": "1762652579.843264", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-14B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-14B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8157776920792386 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6390453705906222 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.547583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4100625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4904421542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-32B-Instruct/c921186d-6e97-46d6-b968-894159271620.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-32B-Instruct/c921186d-6e97-46d6-b968-894159271620.json deleted file mode 100644 index 6a91b08d6eb3cd83bb6f52982a16f1331a946df3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-32B-Instruct/c921186d-6e97-46d6-b968-894159271620.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-32B-Instruct/1762652579.843922", - "retrieved_timestamp": "1762652579.843922", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-32B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-32B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8346121623957765 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6912525080134339 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6253776435045317 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42612500000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.566655585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-3B-Instruct/9fb4e863-fd72-4b60-bc20-e32e64ce99e8.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-3B-Instruct/9fb4e863-fd72-4b60-bc20-e32e64ce99e8.json deleted file mode 100644 index 55f7a982841a920af1fee310eb8f4a2b7726d3cb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-3B-Instruct/9fb4e863-fd72-4b60-bc20-e32e64ce99e8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-3B-Instruct/1762652579.844352", - "retrieved_timestamp": "1762652579.844352", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-3B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-3B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6474919879253713 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.469276665604885 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3678247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39679166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3254654255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-72B-Instruct/9ed2a831-aa5a-4e81-b8b5-397bc8b55835.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-72B-Instruct/9ed2a831-aa5a-4e81-b8b5-397bc8b55835.json deleted file mode 100644 index 0c5cdbb69e301ad3cca2399675e58706478ba86a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-72B-Instruct/9ed2a831-aa5a-4e81-b8b5-397bc8b55835.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-72B-Instruct/1762652579.844789", - "retrieved_timestamp": "1762652579.844789", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-72B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-72B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.863837949972739 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7272747321744824 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5981873111782477 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42060416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5625831117021277 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-7B-Instruct-1M/f338f8b3-d2fa-46e6-b2a1-b83303521b3f.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-7B-Instruct-1M/f338f8b3-d2fa-46e6-b2a1-b83303521b3f.json deleted file mode 100644 index 8d7e125c322d3f742e7d2e3fa4eef2975fd71aef..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-7B-Instruct-1M/f338f8b3-d2fa-46e6-b2a1-b83303521b3f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-7B-Instruct-1M/1762652579.845428", - "retrieved_timestamp": "1762652579.845428", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-7B-Instruct-1M", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-7B-Instruct-1M" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7447616767953474 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5403941270576822 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4335347432024169 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40869791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35048204787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-7B-Instruct/7a336f2b-3b33-4fde-bce6-2d1e884a1b26.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-7B-Instruct/7a336f2b-3b33-4fde-bce6-2d1e884a1b26.json deleted file mode 100644 index d98156e9df77bb1bc057fde742dc54b04685a83c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-7B-Instruct/7a336f2b-3b33-4fde-bce6-2d1e884a1b26.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-7B-Instruct/1762652579.845207", - "retrieved_timestamp": "1762652579.8452082", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-7B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7585251576926999 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5394231968299095 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40203125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4286901595744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Coder-14B-Instruct/f2295cf4-86e0-4c73-8f3d-21c6e5ccd9d9.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Coder-14B-Instruct/f2295cf4-86e0-4c73-8f3d-21c6e5ccd9d9.json deleted file mode 100644 index 68048525ac950479592f9fb28f789a3d66a531c3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Coder-14B-Instruct/f2295cf4-86e0-4c73-8f3d-21c6e5ccd9d9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-14B-Instruct/1762652579.846175", - "retrieved_timestamp": "1762652579.846175", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-Coder-14B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-Coder-14B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6907560827493273 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6140296423661326 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3914583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3939494680851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Coder-32B-Instruct/c0ca7adb-6221-415f-8ed6-0de6439db168.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Coder-32B-Instruct/c0ca7adb-6221-415f-8ed6-0de6439db168.json deleted file mode 100644 index 729a5f684f89e67bbd3700b239827182591d613e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Coder-32B-Instruct/c0ca7adb-6221-415f-8ed6-0de6439db168.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-32B-Instruct/1762652579.846655", - "retrieved_timestamp": "1762652579.846655", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-Coder-32B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-Coder-32B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7265267268625026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6625222222405129 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4954682779456193 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4385833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44132313829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Coder-7B-Instruct/7629f304-5235-485b-a7f6-f5a7f91fd35c.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Coder-7B-Instruct/7629f304-5235-485b-a7f6-f5a7f91fd35c.json deleted file mode 100644 index 4b16f461b3c1674a1a25051f4e18226e2c5087ef..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Coder-7B-Instruct/7629f304-5235-485b-a7f6-f5a7f91fd35c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-7B-Instruct/1762652579.847122", - "retrieved_timestamp": "1762652579.847123", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-Coder-7B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-Coder-7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6101477413263474 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5007976986224548 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4072708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3351894946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Coder-7B-Instruct/81749833-4f2a-4883-a789-c465c11b33b6.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Coder-7B-Instruct/81749833-4f2a-4883-a789-c465c11b33b6.json deleted file mode 100644 index 9823625092fa4a13e1fc0333f5bc9b41eb9a20c2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Coder-7B-Instruct/81749833-4f2a-4883-a789-c465c11b33b6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-7B-Instruct/1762652579.8473449", - "retrieved_timestamp": "1762652579.8473458", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-Coder-7B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-Coder-7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6147189457306613 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4999048550311305 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4099375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33543882978723405 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Math-1.5B-Instruct/393c9602-bd87-48d7-ad95-6baf85ed3341.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Math-1.5B-Instruct/393c9602-bd87-48d7-ad95-6baf85ed3341.json deleted file mode 100644 index 4fecfdea97658c5639af2289cfab265323cb3416..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Math-1.5B-Instruct/393c9602-bd87-48d7-ad95-6baf85ed3341.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Math-1.5B-Instruct/1762652579.84755", - "retrieved_timestamp": "1762652579.84755", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-Math-1.5B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-Math-1.5B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1855731680829089 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37515353898426174 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2628398791540785 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3685416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1801030585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Math-72B-Instruct/64574dc3-4982-49c3-8526-09ebd5781175.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Math-72B-Instruct/64574dc3-4982-49c3-8526-09ebd5781175.json deleted file mode 100644 index ee7447100ff04509dd82f7559c2ceeff17bd781b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Math-72B-Instruct/64574dc3-4982-49c3-8526-09ebd5781175.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Math-72B-Instruct/1762652579.847774", - "retrieved_timestamp": "1762652579.847775", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-Math-72B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-Math-72B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4003466358151926 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6452266637803764 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6238670694864048 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44727083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4812167553191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Math-7B-Instruct/6ba8109e-8906-420f-a780-d0bef4015e1a.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Math-7B-Instruct/6ba8109e-8906-420f-a780-d0bef4015e1a.json deleted file mode 100644 index fcb9a04333201ca6111dcfbe2835780ba1d1ba93..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Math-7B-Instruct/6ba8109e-8906-420f-a780-d0bef4015e1a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Math-7B-Instruct/1762652579.848376", - "retrieved_timestamp": "1762652579.848377", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-Math-7B-Instruct", - "developer": "Qwen", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-Math-7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26358395723347383 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.438762734452786 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5808157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3647291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2819980053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/RDson/RDson_WomboCombo-R1-Coder-14B-Preview/faa623a7-1bf8-4da6-b381-7701f0446b70.json b/leaderboard_data/HFOpenLLMv2/RDson/RDson_WomboCombo-R1-Coder-14B-Preview/faa623a7-1bf8-4da6-b381-7701f0446b70.json deleted file mode 100644 index 7aced4c7c9a1eb1b56e8abe88eab76072a33c0e3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/RDson/RDson_WomboCombo-R1-Coder-14B-Preview/faa623a7-1bf8-4da6-b381-7701f0446b70.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/RDson_WomboCombo-R1-Coder-14B-Preview/1762652579.848609", - "retrieved_timestamp": "1762652579.8486102", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "RDson/WomboCombo-R1-Coder-14B-Preview", - "developer": "RDson", - "inference_platform": "unknown", - "id": "RDson/WomboCombo-R1-Coder-14B-Preview" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.628557782240012 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6392098699331132 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5989425981873112 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4843854166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5167885638297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/RLHFlow/RLHFlow_LLaMA3-iterative-DPO-final/8ccda2e0-9801-41b0-8491-eb36615860f2.json b/leaderboard_data/HFOpenLLMv2/RLHFlow/RLHFlow_LLaMA3-iterative-DPO-final/8ccda2e0-9801-41b0-8491-eb36615860f2.json deleted file mode 100644 index 4d54c773d17c3d02c29188fea72f4aae76c08101..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/RLHFlow/RLHFlow_LLaMA3-iterative-DPO-final/8ccda2e0-9801-41b0-8491-eb36615860f2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/RLHFlow_LLaMA3-iterative-DPO-final/1762652579.849687", - "retrieved_timestamp": "1762652579.849688", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "RLHFlow/LLaMA3-iterative-DPO-final", - "developer": "RLHFlow", - "inference_platform": "unknown", - "id": "RLHFlow/LLaMA3-iterative-DPO-final" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.53401086893886 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5058257182733729 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08836858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3672708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32571476063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/RWKV/RWKV_rwkv-raven-14b/9a90826f-9062-48aa-b047-d24f4e0d85ef.json b/leaderboard_data/HFOpenLLMv2/RWKV/RWKV_rwkv-raven-14b/9a90826f-9062-48aa-b047-d24f4e0d85ef.json deleted file mode 100644 index 912a3f2527b6607c1101286d4e68c42404f3f11e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/RWKV/RWKV_rwkv-raven-14b/9a90826f-9062-48aa-b047-d24f4e0d85ef.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/RWKV_rwkv-raven-14b/1762652579.849975", - "retrieved_timestamp": "1762652579.849976", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "RWKV/rwkv-raven-14b", - "developer": "RWKV", - "inference_platform": "unknown", - "id": "RWKV/rwkv-raven-14b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07683723631076655 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3307041176552897 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22902684563758388 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11502659574468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "RwkvForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Rakuten/Rakuten_RakutenAI-2.0-mini-instruct/549f9869-4b59-469b-b9fd-ea26114405a1.json b/leaderboard_data/HFOpenLLMv2/Rakuten/Rakuten_RakutenAI-2.0-mini-instruct/549f9869-4b59-469b-b9fd-ea26114405a1.json deleted file mode 100644 index 0b1c3d3ee192c696eb5332882d7559ff41443216..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Rakuten/Rakuten_RakutenAI-2.0-mini-instruct/549f9869-4b59-469b-b9fd-ea26114405a1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Rakuten_RakutenAI-2.0-mini-instruct/1762652579.850244", - "retrieved_timestamp": "1762652579.850244", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Rakuten/RakutenAI-2.0-mini-instruct", - "developer": "Rakuten", - "inference_platform": "unknown", - "id": "Rakuten/RakutenAI-2.0-mini-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6793906833867471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2867197270809481 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3249166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178523936170212 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 1.535 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Rakuten/Rakuten_RakutenAI-7B-chat/91e22241-7b65-44b9-a437-34b56400af7a.json b/leaderboard_data/HFOpenLLMv2/Rakuten/Rakuten_RakutenAI-7B-chat/91e22241-7b65-44b9-a437-34b56400af7a.json deleted file mode 100644 index e8814cac3114c877ee4a413fa12f1169855b5663..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Rakuten/Rakuten_RakutenAI-7B-chat/91e22241-7b65-44b9-a437-34b56400af7a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Rakuten_RakutenAI-7B-chat/1762652579.850715", - "retrieved_timestamp": "1762652579.8507159", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Rakuten/RakutenAI-7B-chat", - "developer": "Rakuten", - "inference_platform": "unknown", - "id": "Rakuten/RakutenAI-7B-chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26855521128383797 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4316204035758174 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37895833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2798371010638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.373 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Rakuten/Rakuten_RakutenAI-7B/cab9a80e-94a6-4e7b-8980-1fa4482bac8a.json b/leaderboard_data/HFOpenLLMv2/Rakuten/Rakuten_RakutenAI-7B/cab9a80e-94a6-4e7b-8980-1fa4482bac8a.json deleted file mode 100644 index 6de5c6673dc7f9a92c9e03ed880eecb6cc93e140..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Rakuten/Rakuten_RakutenAI-7B/cab9a80e-94a6-4e7b-8980-1fa4482bac8a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Rakuten_RakutenAI-7B/1762652579.8505", - "retrieved_timestamp": "1762652579.850501", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Rakuten/RakutenAI-7B", - "developer": "Rakuten", - "inference_platform": "unknown", - "id": "Rakuten/RakutenAI-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1555971488982566 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43149052613615435 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37381250000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28773271276595747 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.373 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Replete-AI/Replete-AI_L3-Pneuma-8B/5eddb8a8-7281-4ae2-a4bc-f174598727e3.json b/leaderboard_data/HFOpenLLMv2/Replete-AI/Replete-AI_L3-Pneuma-8B/5eddb8a8-7281-4ae2-a4bc-f174598727e3.json deleted file mode 100644 index e7a2835a3f28784124dbc5fc55f6c8660186020f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Replete-AI/Replete-AI_L3-Pneuma-8B/5eddb8a8-7281-4ae2-a4bc-f174598727e3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Replete-AI_L3-Pneuma-8B/1762652579.85093", - "retrieved_timestamp": "1762652579.850931", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Replete-AI/L3-Pneuma-8B", - "developer": "Replete-AI", - "inference_platform": "unknown", - "id": "Replete-AI/L3-Pneuma-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24132745559559746 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4908680380935449 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4105208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3175698138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Replete-AI/Replete-AI_L3.1-Pneuma-8B/d20e8883-4cde-45dc-9d60-10284a2a5cdb.json b/leaderboard_data/HFOpenLLMv2/Replete-AI/Replete-AI_L3.1-Pneuma-8B/d20e8883-4cde-45dc-9d60-10284a2a5cdb.json deleted file mode 100644 index 88c4a6c75b6e85b6a1ea43721ba8ccc7504f53dc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Replete-AI/Replete-AI_L3.1-Pneuma-8B/d20e8883-4cde-45dc-9d60-10284a2a5cdb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Replete-AI_L3.1-Pneuma-8B/1762652579.851203", - "retrieved_timestamp": "1762652579.8512042", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Replete-AI/L3.1-Pneuma-8B", - "developer": "Replete-AI", - "inference_platform": "unknown", - "id": "Replete-AI/L3.1-Pneuma-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.707642388861554 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.504990389092237 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21978851963746224 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3871145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36909906914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Replete-AI/Replete-AI_Llama3-8B-Instruct-Replete-Adapted/861d8edd-2acf-4593-9768-8f77488ce8a4.json b/leaderboard_data/HFOpenLLMv2/Replete-AI/Replete-AI_Llama3-8B-Instruct-Replete-Adapted/861d8edd-2acf-4593-9768-8f77488ce8a4.json deleted file mode 100644 index 6722fef6a313ff3520d7d80103d7b6d373d2a69b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Replete-AI/Replete-AI_Llama3-8B-Instruct-Replete-Adapted/861d8edd-2acf-4593-9768-8f77488ce8a4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Replete-AI_Llama3-8B-Instruct-Replete-Adapted/1762652579.8514109", - "retrieved_timestamp": "1762652579.851412", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Replete-AI/Llama3-8B-Instruct-Replete-Adapted", - "developer": "Replete-AI", - "inference_platform": "unknown", - "id": "Replete-AI/Llama3-8B-Instruct-Replete-Adapted" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6915306941138402 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48702618293318983 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07099697885196375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36339583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3390957446808511 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Replete-AI/Replete-AI_Replete-Coder-Instruct-8b-Merged/398e665d-af8e-420c-95ce-5f9f4a4988af.json b/leaderboard_data/HFOpenLLMv2/Replete-AI/Replete-AI_Replete-Coder-Instruct-8b-Merged/398e665d-af8e-420c-95ce-5f9f4a4988af.json deleted file mode 100644 index c36eb521249f51cc54ce7bf900b932c833f6c652..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Replete-AI/Replete-AI_Replete-Coder-Instruct-8b-Merged/398e665d-af8e-420c-95ce-5f9f4a4988af.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-Coder-Instruct-8b-Merged/1762652579.851615", - "retrieved_timestamp": "1762652579.851616", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Replete-AI/Replete-Coder-Instruct-8b-Merged", - "developer": "Replete-AI", - "inference_platform": "unknown", - "id": "Replete-AI/Replete-Coder-Instruct-8b-Merged" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5387571643239937 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4461693860075828 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36603125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18051861702127658 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/RezVortex/RezVortex_JAJUKA-WEWILLNEVERFORGETYOU-3B/76f26fef-fa87-4cf5-a317-ea4b743e7432.json b/leaderboard_data/HFOpenLLMv2/RezVortex/RezVortex_JAJUKA-WEWILLNEVERFORGETYOU-3B/76f26fef-fa87-4cf5-a317-ea4b743e7432.json deleted file mode 100644 index db4cf77b08f94e0ebc24071376a76c3aef4ae264..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/RezVortex/RezVortex_JAJUKA-WEWILLNEVERFORGETYOU-3B/76f26fef-fa87-4cf5-a317-ea4b743e7432.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/RezVortex_JAJUKA-WEWILLNEVERFORGETYOU-3B/1762652579.853197", - "retrieved_timestamp": "1762652579.853197", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "RezVortex/JAJUKA-WEWILLNEVERFORGETYOU-3B", - "developer": "RezVortex", - "inference_platform": "unknown", - "id": "RezVortex/JAJUKA-WEWILLNEVERFORGETYOU-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6858103166265509 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46189139399865614 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15483383685800603 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36302083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3143284574468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/RezVortex/RezVortex_Jajuka-3b/a41d111c-dd5d-4f77-b52d-9a2dc9f31e50.json b/leaderboard_data/HFOpenLLMv2/RezVortex/RezVortex_Jajuka-3b/a41d111c-dd5d-4f77-b52d-9a2dc9f31e50.json deleted file mode 100644 index 8ca85c344a5cbe7a36b9e88b700adce908a6086b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/RezVortex/RezVortex_Jajuka-3b/a41d111c-dd5d-4f77-b52d-9a2dc9f31e50.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/RezVortex_Jajuka-3b/1762652579.85344", - "retrieved_timestamp": "1762652579.853441", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "RezVortex/Jajuka-3b", - "developer": "RezVortex", - "inference_platform": "unknown", - "id": "RezVortex/Jajuka-3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6925047762159957 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4593872338446621 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1593655589123867 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3670833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3137466755319149 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Ro-xe/Ro-xe_FMixIA-7B-DARE-0/93930443-dc12-422f-9920-470917ef8d7d.json b/leaderboard_data/HFOpenLLMv2/Ro-xe/Ro-xe_FMixIA-7B-DARE-0/93930443-dc12-422f-9920-470917ef8d7d.json deleted file mode 100644 index da628214c0e3c87db352944552bdc09bf486c4a3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Ro-xe/Ro-xe_FMixIA-7B-DARE-0/93930443-dc12-422f-9920-470917ef8d7d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Ro-xe_FMixIA-7B-DARE-0/1762652579.8536398", - "retrieved_timestamp": "1762652579.853641", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Ro-xe/FMixIA-7B-DARE-0", - "developer": "Ro-xe", - "inference_platform": "unknown", - "id": "Ro-xe/FMixIA-7B-DARE-0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3341256754300811 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5035332799973222 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45448958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3016123670212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Ro-xe/Ro-xe_FMixIA-7B-SLERP-27/7f08546a-3f05-4612-879c-3f293daeabd4.json b/leaderboard_data/HFOpenLLMv2/Ro-xe/Ro-xe_FMixIA-7B-SLERP-27/7f08546a-3f05-4612-879c-3f293daeabd4.json deleted file mode 100644 index 886b2725975398396c99c5536dbd12fa609d56a8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Ro-xe/Ro-xe_FMixIA-7B-SLERP-27/7f08546a-3f05-4612-879c-3f293daeabd4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Ro-xe_FMixIA-7B-SLERP-27/1762652579.853882", - "retrieved_timestamp": "1762652579.8538828", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Ro-xe/FMixIA-7B-SLERP-27", - "developer": "Ro-xe", - "inference_platform": "unknown", - "id": "Ro-xe/FMixIA-7B-SLERP-27" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3765409114482905 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5150591725181265 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44115624999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30078125 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Ro-xe/Ro-xe_FMixIA-7B-TIES-1/b5d64806-0d01-4c99-9ba6-6aff88c894bd.json b/leaderboard_data/HFOpenLLMv2/Ro-xe/Ro-xe_FMixIA-7B-TIES-1/b5d64806-0d01-4c99-9ba6-6aff88c894bd.json deleted file mode 100644 index a5317872a6353ee1dd15d93bb16a5558b97ae2cc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Ro-xe/Ro-xe_FMixIA-7B-TIES-1/b5d64806-0d01-4c99-9ba6-6aff88c894bd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Ro-xe_FMixIA-7B-TIES-1/1762652579.8540852", - "retrieved_timestamp": "1762652579.8540852", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Ro-xe/FMixIA-7B-TIES-1", - "developer": "Ro-xe", - "inference_platform": "unknown", - "id": "Ro-xe/FMixIA-7B-TIES-1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34529160405501846 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5091539642456672 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46890625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2992021276595745 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Ro-xe/Ro-xe_FMixIA-FrankenMerge-9.5B-PT-9/0d1c7e5e-4ddf-447b-9581-c62cedc2fedc.json b/leaderboard_data/HFOpenLLMv2/Ro-xe/Ro-xe_FMixIA-FrankenMerge-9.5B-PT-9/0d1c7e5e-4ddf-447b-9581-c62cedc2fedc.json deleted file mode 100644 index 9a8a5c6d54262a491440759826d5a07f272de6e9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Ro-xe/Ro-xe_FMixIA-FrankenMerge-9.5B-PT-9/0d1c7e5e-4ddf-447b-9581-c62cedc2fedc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Ro-xe_FMixIA-FrankenMerge-9.5B-PT-9/1762652579.8542862", - "retrieved_timestamp": "1762652579.8542871", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Ro-xe/FMixIA-FrankenMerge-9.5B-PT-9", - "developer": "Ro-xe", - "inference_platform": "unknown", - "id": "Ro-xe/FMixIA-FrankenMerge-9.5B-PT-9" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19401632113902223 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5087851148631056 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0030211480362537764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41703124999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36569148936170215 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.141 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/RubielLabarta/RubielLabarta_LogoS-7Bx2-MoE-13B-v0.2/63522d1e-d4bf-4071-a086-5ef016243ec1.json b/leaderboard_data/HFOpenLLMv2/RubielLabarta/RubielLabarta_LogoS-7Bx2-MoE-13B-v0.2/63522d1e-d4bf-4071-a086-5ef016243ec1.json deleted file mode 100644 index 2e8ae1bb097989b02a3fca9a01486c7de91ab67a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/RubielLabarta/RubielLabarta_LogoS-7Bx2-MoE-13B-v0.2/63522d1e-d4bf-4071-a086-5ef016243ec1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/RubielLabarta_LogoS-7Bx2-MoE-13B-v0.2/1762652579.85476", - "retrieved_timestamp": "1762652579.85476", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "RubielLabarta/LogoS-7Bx2-MoE-13B-v0.2", - "developer": "RubielLabarta", - "inference_platform": "unknown", - "id": "RubielLabarta/LogoS-7Bx2-MoE-13B-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4378903531518593 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206958722481815 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4226145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087599734042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SaisExperiments/SaisExperiments_Evil-Alpaca-3B-L3.2/f9c7c5b5-6274-4971-a81a-6f88ec07ca93.json b/leaderboard_data/HFOpenLLMv2/SaisExperiments/SaisExperiments_Evil-Alpaca-3B-L3.2/f9c7c5b5-6274-4971-a81a-6f88ec07ca93.json deleted file mode 100644 index 2c7eab2779051b2205b77f4db0a97ef36556c0b7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SaisExperiments/SaisExperiments_Evil-Alpaca-3B-L3.2/f9c7c5b5-6274-4971-a81a-6f88ec07ca93.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SaisExperiments_Evil-Alpaca-3B-L3.2/1762652579.8550148", - "retrieved_timestamp": "1762652579.8550148", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SaisExperiments/Evil-Alpaca-3B-L3.2", - "developer": "SaisExperiments", - "inference_platform": "unknown", - "id": "SaisExperiments/Evil-Alpaca-3B-L3.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32510848991786234 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4340757699220565 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4197604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2621343085106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SaisExperiments/SaisExperiments_Gemma-2-2B-Opus-Instruct/369f84c6-022e-46ed-8cfc-2e0b4a8e175a.json b/leaderboard_data/HFOpenLLMv2/SaisExperiments/SaisExperiments_Gemma-2-2B-Opus-Instruct/369f84c6-022e-46ed-8cfc-2e0b4a8e175a.json deleted file mode 100644 index 8445c054b702b81cd8c0d7e76f9faffec9e7d404..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SaisExperiments/SaisExperiments_Gemma-2-2B-Opus-Instruct/369f84c6-022e-46ed-8cfc-2e0b4a8e175a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SaisExperiments_Gemma-2-2B-Opus-Instruct/1762652579.855459", - "retrieved_timestamp": "1762652579.8554602", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SaisExperiments/Gemma-2-2B-Opus-Instruct", - "developer": "SaisExperiments", - "inference_platform": "unknown", - "id": "SaisExperiments/Gemma-2-2B-Opus-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.474959773401242 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4292846281445681 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4056875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2650432180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SaisExperiments/SaisExperiments_Not-So-Small-Alpaca-24B/98275290-dbd0-462e-9028-4daa65cd5ce3.json b/leaderboard_data/HFOpenLLMv2/SaisExperiments/SaisExperiments_Not-So-Small-Alpaca-24B/98275290-dbd0-462e-9028-4daa65cd5ce3.json deleted file mode 100644 index 80591659d687351c7a6b7b7a9ae63dcb1af7b1c9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SaisExperiments/SaisExperiments_Not-So-Small-Alpaca-24B/98275290-dbd0-462e-9028-4daa65cd5ce3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SaisExperiments_Not-So-Small-Alpaca-24B/1762652579.855924", - "retrieved_timestamp": "1762652579.855925", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SaisExperiments/Not-So-Small-Alpaca-24B", - "developer": "SaisExperiments", - "inference_platform": "unknown", - "id": "SaisExperiments/Not-So-Small-Alpaca-24B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6243611395541607 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5338637679203099 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18277945619335348 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35906040268456374 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42816666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36943151595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SaisExperiments/SaisExperiments_QwOwO-7B-V1/9064bdc6-b84b-4022-9d7a-63b1b76fc1bc.json b/leaderboard_data/HFOpenLLMv2/SaisExperiments/SaisExperiments_QwOwO-7B-V1/9064bdc6-b84b-4022-9d7a-63b1b76fc1bc.json deleted file mode 100644 index 4f71c9a34da49d41cdfc82631af3e62b94b0cb76..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SaisExperiments/SaisExperiments_QwOwO-7B-V1/9064bdc6-b84b-4022-9d7a-63b1b76fc1bc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SaisExperiments_QwOwO-7B-V1/1762652579.856126", - "retrieved_timestamp": "1762652579.856126", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SaisExperiments/QwOwO-7B-V1", - "developer": "SaisExperiments", - "inference_platform": "unknown", - "id": "SaisExperiments/QwOwO-7B-V1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45562551806983254 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5431230107025949 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859516616314199 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38348958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42237367021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Anemoi-3B/b50b5452-b824-4fd6-b0e4-cdaea09139a2.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Anemoi-3B/b50b5452-b824-4fd6-b0e4-cdaea09139a2.json deleted file mode 100644 index ac7cdf508140bb616e8bca4daa1b1bee8daaf90f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Anemoi-3B/b50b5452-b824-4fd6-b0e4-cdaea09139a2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_Anemoi-3B/1762652579.856576", - "retrieved_timestamp": "1762652579.856576", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/Anemoi-3B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Anemoi-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3803629924156793 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4921954661921298 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17749244712990936 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43706249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3765791223404255 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Euphrates-14B/db8c1ba2-4029-45c5-b8a6-5343356266eb.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Euphrates-14B/db8c1ba2-4029-45c5-b8a6-5343356266eb.json deleted file mode 100644 index 2e0682aafb5588aff045146b7e61d3329c822b8e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Euphrates-14B/db8c1ba2-4029-45c5-b8a6-5343356266eb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_Euphrates-14B/1762652579.856813", - "retrieved_timestamp": "1762652579.8568141", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/Euphrates-14B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Euphrates-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26468326263203856 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6137691668744961 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30513595166163143 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45157291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5255152925531915 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Magro-7B-v1.1/9e6c7958-689f-4437-b81a-c055d53ca33e.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Magro-7B-v1.1/9e6c7958-689f-4437-b81a-c055d53ca33e.json deleted file mode 100644 index 103a4078035eda96869f0495773eb1a13450ba4c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Magro-7B-v1.1/9e6c7958-689f-4437-b81a-c055d53ca33e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_Magro-7B-v1.1/1762652579.857256", - "retrieved_timestamp": "1762652579.857256", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/Magro-7B-v1.1", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Magro-7B-v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1204016454119514 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41790625208343796 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4433229166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27642952127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Neptuno-3B/4c2150fc-f473-4bdc-8823-960778ccbc75.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Neptuno-3B/4c2150fc-f473-4bdc-8823-960778ccbc75.json deleted file mode 100644 index 1d612a155afa316b287eb35c26c133197950e3fa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Neptuno-3B/4c2150fc-f473-4bdc-8823-960778ccbc75.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_Neptuno-3B/1762652579.857454", - "retrieved_timestamp": "1762652579.857455", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/Neptuno-3B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Neptuno-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42962229107656574 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48335808848564965 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40019791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3773271276595745 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Neptuno-Alpha/511ac4a5-6fc8-4338-845d-859d73d57678.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Neptuno-Alpha/511ac4a5-6fc8-4338-845d-859d73d57678.json deleted file mode 100644 index 7fd98b55665de8c96542e6c6310d65263d8cf988..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Neptuno-Alpha/511ac4a5-6fc8-4338-845d-859d73d57678.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_Neptuno-Alpha/1762652579.857697", - "retrieved_timestamp": "1762652579.857698", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/Neptuno-Alpha", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Neptuno-Alpha" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3779649108809071 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49247749379461303 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18353474320241692 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43706249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3767453457446808 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Oxyge1-33B/ee17e3a4-2036-4e57-9ada-51fe6d23ffac.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Oxyge1-33B/ee17e3a4-2036-4e57-9ada-51fe6d23ffac.json deleted file mode 100644 index 927bf0e356ccb02013feae7a9559885de518f9a9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Oxyge1-33B/ee17e3a4-2036-4e57-9ada-51fe6d23ffac.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_Oxyge1-33B/1762652579.8578959", - "retrieved_timestamp": "1762652579.857897", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/Oxyge1-33B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Oxyge1-33B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4548265269484966 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7033278292161169 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4962235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3825503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5007812500000001 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5909242021276596 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Qwen2.5-1B-Instruct/da01b31f-dde8-45dd-b793-c8258a09ddee.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Qwen2.5-1B-Instruct/da01b31f-dde8-45dd-b793-c8258a09ddee.json deleted file mode 100644 index 9596418cfec466771a28fb3abecb9af214e51922..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Qwen2.5-1B-Instruct/da01b31f-dde8-45dd-b793-c8258a09ddee.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_Qwen2.5-1B-Instruct/1762652579.858331", - "retrieved_timestamp": "1762652579.858331", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/Qwen2.5-1B-Instruct", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Qwen2.5-1B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17513198313807365 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30271528035563927 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33688541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12134308510638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.988 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-0.5B/7763650a-8a37-41f2-aadd-b1db7b41d0b3.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-0.5B/7763650a-8a37-41f2-aadd-b1db7b41d0b3.json deleted file mode 100644 index 3a8ba2f4a0917add07c6f862dec57d1a3f81c7d0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-0.5B/7763650a-8a37-41f2-aadd-b1db7b41d0b3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-0.5B/1762652579.858787", - "retrieved_timestamp": "1762652579.858787", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/SJT-0.5B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-0.5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24247662867857286 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33055365550588683 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31958333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18907912234042554 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-1.5B-Alpha-1.1/e3f05df1-a653-41a0-983a-4a7d86b85c60.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-1.5B-Alpha-1.1/e3f05df1-a653-41a0-983a-4a7d86b85c60.json deleted file mode 100644 index a99b3ddb83ab47e45625de0ff4ae9a73e94f5e75..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-1.5B-Alpha-1.1/e3f05df1-a653-41a0-983a-4a7d86b85c60.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-1.5B-Alpha-1.1/1762652579.859199", - "retrieved_timestamp": "1762652579.859199", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/SJT-1.5B-Alpha-1.1", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-1.5B-Alpha-1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3439429602344003 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4243160272518483 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09592145015105741 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42391666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.296625664893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-1.5B-Alpha/21472871-fe74-447a-894c-80d77ae4ad0a.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-1.5B-Alpha/21472871-fe74-447a-894c-80d77ae4ad0a.json deleted file mode 100644 index 0b5ea9300400c48ef08b417502a4009f1d8ccef5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-1.5B-Alpha/21472871-fe74-447a-894c-80d77ae4ad0a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-1.5B-Alpha/1762652579.858988", - "retrieved_timestamp": "1762652579.858989", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/SJT-1.5B-Alpha", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-1.5B-Alpha" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3448671746521452 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4240819448548446 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4226145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961269946808511 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-1.7B/6e2f01c1-ba87-4687-9db1-a0c0004bdfe1.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-1.7B/6e2f01c1-ba87-4687-9db1-a0c0004bdfe1.json deleted file mode 100644 index b5152c089c67c15ad02deba0d260e6c5ab5ec88d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-1.7B/6e2f01c1-ba87-4687-9db1-a0c0004bdfe1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-1.7B/1762652579.859416", - "retrieved_timestamp": "1762652579.8594172", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/SJT-1.7B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-1.7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17762980004166723 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2934008926922806 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0015105740181268882 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24161073825503357 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39641666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11328125 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.684 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-14B/1169b5fd-9418-4986-940a-276d163431c0.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-14B/1169b5fd-9418-4986-940a-276d163431c0.json deleted file mode 100644 index 8d3c362058eecc4827e2e4267ef87b29d2245db1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-14B/1169b5fd-9418-4986-940a-276d163431c0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-14B/1762652579.8596292", - "retrieved_timestamp": "1762652579.85963", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/SJT-14B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5494233079340594 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6536135646865123 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38674496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.476625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5380651595744681 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-2.4B/30b98827-5afb-4bfe-b765-9c81cb4580f4.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-2.4B/30b98827-5afb-4bfe-b765-9c81cb4580f4.json deleted file mode 100644 index dc5b5c94e3ace4dbd7a3c0d68e8de79087845380..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-2.4B/30b98827-5afb-4bfe-b765-9c81cb4580f4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-2.4B/1762652579.859841", - "retrieved_timestamp": "1762652579.859841", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/SJT-2.4B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-2.4B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28042039566128985 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.349012395546882 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36990624999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1858377659574468 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 2.432 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-24B-Alpha/f86649f8-8962-4496-8cd8-fed702a7e63b.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-24B-Alpha/f86649f8-8962-4496-8cd8-fed702a7e63b.json deleted file mode 100644 index 79d30ce367ac0e90b7306c6e0eb167416dafd63d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-24B-Alpha/f86649f8-8962-4496-8cd8-fed702a7e63b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-24B-Alpha/1762652579.860041", - "retrieved_timestamp": "1762652579.860041", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/SJT-24B-Alpha", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-24B-Alpha" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3206370208823699 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6080838080485248 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25302114803625375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45947916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48570478723404253 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 24.125 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-2B-V1.1/b4e467a7-3f2d-438a-8c42-1f7da1aafd20.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-2B-V1.1/b4e467a7-3f2d-438a-8c42-1f7da1aafd20.json deleted file mode 100644 index dc3b675e07af3f2b9a4c47edde392569f4e45382..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-2B-V1.1/b4e467a7-3f2d-438a-8c42-1f7da1aafd20.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-2B-V1.1/1762652579.860439", - "retrieved_timestamp": "1762652579.860439", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/SJT-2B-V1.1", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-2B-V1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3977235956151899 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39838417813569243 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42993750000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21243351063829788 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-2B/f720d81c-04e1-4f8a-b452-ae52cc7d9fb2.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-2B/f720d81c-04e1-4f8a-b452-ae52cc7d9fb2.json deleted file mode 100644 index 71dc83a6b3601ec51b96ebf4b30fb1f4ea3b09f1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-2B/f720d81c-04e1-4f8a-b452-ae52cc7d9fb2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-2B/1762652579.8602371", - "retrieved_timestamp": "1762652579.860238", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/SJT-2B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-2B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21507378200951255 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29364597509285106 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24161073825503357 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35641666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11868351063829788 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-3.7B/e82f1a2e-f679-47b8-9fbb-a53116e2195b.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-3.7B/e82f1a2e-f679-47b8-9fbb-a53116e2195b.json deleted file mode 100644 index 646383ee4a0f4f9235e984ddf748dca7f56d152e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-3.7B/e82f1a2e-f679-47b8-9fbb-a53116e2195b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-3.7B/1762652579.860638", - "retrieved_timestamp": "1762652579.8606389", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/SJT-3.7B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-3.7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10776184966998675 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3393045259885476 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36171875000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1505152925531915 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.783 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-4B/5115cea0-d3bf-486b-9609-36698e845653.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-4B/5115cea0-d3bf-486b-9609-36698e845653.json deleted file mode 100644 index e8dc13ef46324169549d8f40defd568796387675..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-4B/5115cea0-d3bf-486b-9609-36698e845653.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-4B/1762652579.8608499", - "retrieved_timestamp": "1762652579.860851", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/SJT-4B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-4B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4077403511571519 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4885743296577029 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4779583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.328125 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-7.5B/57934f76-c8bd-4264-a3b4-14234dda0719.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-7.5B/57934f76-c8bd-4264-a3b4-14234dda0719.json deleted file mode 100644 index 2a10c73b36680bcbaa528fd9069b2177d86def5c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-7.5B/57934f76-c8bd-4264-a3b4-14234dda0719.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-7.5B/1762652579.861058", - "retrieved_timestamp": "1762652579.861058", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/SJT-7.5B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-7.5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42232831110342783 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5367364587851736 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21676737160120846 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43988541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951130319148936 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-7B-V1.1-Multilingal/03cb237a-0519-449c-b9c7-d9fbb4d119cd.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-7B-V1.1-Multilingal/03cb237a-0519-449c-b9c7-d9fbb4d119cd.json deleted file mode 100644 index 41d4bc8e6a099045da42cffd251695823af4b46a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-7B-V1.1-Multilingal/03cb237a-0519-449c-b9c7-d9fbb4d119cd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-7B-V1.1-Multilingal/1762652579.861463", - "retrieved_timestamp": "1762652579.861464", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/SJT-7B-V1.1-Multilingal", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-7B-V1.1-Multilingal" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19494053555676716 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2919597646466201 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.362125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11369680851063829 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-7B-V1.1/b1527426-9cc0-4eb5-af52-30e36e0e04fd.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-7B-V1.1/b1527426-9cc0-4eb5-af52-30e36e0e04fd.json deleted file mode 100644 index cea7e30753a4e9ae3a8cf7b64c4e39dc5a7c74d9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-7B-V1.1/b1527426-9cc0-4eb5-af52-30e36e0e04fd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-7B-V1.1/1762652579.861262", - "retrieved_timestamp": "1762652579.861263", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/SJT-7B-V1.1", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-7B-V1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4702888336281067 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5418885259534293 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44106249999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.441156914893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-8B-V1.1/0cf37c9e-9218-4366-8065-befea0d2b749.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-8B-V1.1/0cf37c9e-9218-4366-8065-befea0d2b749.json deleted file mode 100644 index d5d7ba32a2c5fba8608c88de6eb29cbbd303a27e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-8B-V1.1/0cf37c9e-9218-4366-8065-befea0d2b749.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-8B-V1.1/1762652579.8618612", - "retrieved_timestamp": "1762652579.861862", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/SJT-8B-V1.1", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-8B-V1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4620706392372239 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5120768392487195 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20694864048338368 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4266145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4231216755319149 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 8.545 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-8B/cb136400-7d0e-4194-9a45-1646ff8cac95.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-8B/cb136400-7d0e-4194-9a45-1646ff8cac95.json deleted file mode 100644 index 136152a74f0266944983ec8d55fa8307244b1d16..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-8B/cb136400-7d0e-4194-9a45-1646ff8cac95.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-8B/1762652579.861662", - "retrieved_timestamp": "1762652579.8616629", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/SJT-8B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6534871917623019 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5281955607099067 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2537764350453172 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3296979865771812 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4079791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4266123670212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 8.548 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-900M/ff057dd9-0102-485d-88d7-7e50145b5f7e.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-900M/ff057dd9-0102-485d-88d7-7e50145b5f7e.json deleted file mode 100644 index 04d3a669eb17c081a477ad5e487cc355c9646422..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-900M/ff057dd9-0102-485d-88d7-7e50145b5f7e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-900M/1762652579.862072", - "retrieved_timestamp": "1762652579.8620732", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/SJT-900M", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-900M" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2410027615615456 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31692036321713823 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35945833333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11419547872340426 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.899 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-Moe2x7.5B/e95c6f08-ab57-49a2-a83b-6a77b5ab69d9.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-Moe2x7.5B/e95c6f08-ab57-49a2-a83b-6a77b5ab69d9.json deleted file mode 100644 index 1f4af8d6739345a5d507c7441430c8ff355d5f91..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-Moe2x7.5B/e95c6f08-ab57-49a2-a83b-6a77b5ab69d9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_SJT-Moe2x7.5B/1762652579.862277", - "retrieved_timestamp": "1762652579.862278", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/SJT-Moe2x7.5B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJT-Moe2x7.5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41166216749336204 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5370697921185069 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43988541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3953623670212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 13.401 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJTPass-2/7f508bd9-7f95-453d-9e96-747ce91a64b3.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJTPass-2/7f508bd9-7f95-453d-9e96-747ce91a64b3.json deleted file mode 100644 index f591f012376fae63a98546e6a844c91937bc65b4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJTPass-2/7f508bd9-7f95-453d-9e96-747ce91a64b3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_SJTPass-2/1762652579.8624809", - "retrieved_timestamp": "1762652579.8624818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/SJTPass-2", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJTPass-2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24002867945939 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33022032217255354 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32225 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1901595744680851 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJTPass-4/f814a3bd-b82e-4769-9ef7-a4670420bca0.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJTPass-4/f814a3bd-b82e-4769-9ef7-a4670420bca0.json deleted file mode 100644 index 3021202b3ddcbf467cc860ae18d4a148dec33b23..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJTPass-4/f814a3bd-b82e-4769-9ef7-a4670420bca0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_SJTPass-4/1762652579.8627222", - "retrieved_timestamp": "1762652579.8627222", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/SJTPass-4", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJTPass-4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19129354557019818 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2963644180215358 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38981249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10829454787234043 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.167 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJTPass-5/5d5bda4e-8994-4cef-9772-d4bd435e9644.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJTPass-5/5d5bda4e-8994-4cef-9772-d4bd435e9644.json deleted file mode 100644 index 034438e4db724a0259b5a4ccb6b88d56344cc7a8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJTPass-5/5d5bda4e-8994-4cef-9772-d4bd435e9644.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_SJTPass-5/1762652579.862921", - "retrieved_timestamp": "1762652579.862922", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/SJTPass-5", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SJTPass-5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24247662867857286 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31029599812555747 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3794270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13272938829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.809 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba-Passthrough-2/df1e7d22-c300-4466-92b7-770078a1dc09.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba-Passthrough-2/df1e7d22-c300-4466-92b7-770078a1dc09.json deleted file mode 100644 index 3f68336968aa0fa0ccfbbaf5e561116a7d7a0bb3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba-Passthrough-2/df1e7d22-c300-4466-92b7-770078a1dc09.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_Saba-Passthrough-2/1762652579.863117", - "retrieved_timestamp": "1762652579.8631182", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/Saba-Passthrough-2", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Saba-Passthrough-2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16913677930114318 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36724803467499195 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3844479166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20769614361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.087 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba1-1.8B/d8cc8e9e-b672-4b26-a454-f97cd7a08648.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba1-1.8B/d8cc8e9e-b672-4b26-a454-f97cd7a08648.json deleted file mode 100644 index 768acf6d257c86a97dcde30b4c9fb1d39aba88f9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba1-1.8B/d8cc8e9e-b672-4b26-a454-f97cd7a08648.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_Saba1-1.8B/1762652579.863334", - "retrieved_timestamp": "1762652579.863334", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/Saba1-1.8B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Saba1-1.8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3332768166243345 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4147375470428282 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1540785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4238854166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2925531914893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba1-7B/1200ed26-8450-4788-a1bf-20f2c9b9b2c0.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba1-7B/1200ed26-8450-4788-a1bf-20f2c9b9b2c0.json deleted file mode 100644 index eb0224d69fc14697f421c72452dcd9e4449ce5e6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba1-7B/1200ed26-8450-4788-a1bf-20f2c9b9b2c0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_Saba1-7B/1762652579.863542", - "retrieved_timestamp": "1762652579.863542", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/Saba1-7B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Saba1-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45847351693506566 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5489063327459239 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36631419939577037 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47932291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43758311170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba1.5-1.5B/a76090d4-a0fb-45c8-b28c-fa225ec3d11c.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba1.5-1.5B/a76090d4-a0fb-45c8-b28c-fa225ec3d11c.json deleted file mode 100644 index 49b8aef61e3db1602719460dc0ffbb298f6052b0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba1.5-1.5B/a76090d4-a0fb-45c8-b28c-fa225ec3d11c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_Saba1.5-1.5B/1762652579.8637571", - "retrieved_timestamp": "1762652579.863758", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/Saba1.5-1.5B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Saba1.5-1.5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3332768166243345 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4147375470428282 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1540785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4238854166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2925531914893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba1.5-Pro-3B/14e1dd44-92f1-4d97-be67-fa98c9802ff1.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba1.5-Pro-3B/14e1dd44-92f1-4d97-be67-fa98c9802ff1.json deleted file mode 100644 index 4d4c3ce485317df00a5921e2bf4dbfc15dfa2c1d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba1.5-Pro-3B/14e1dd44-92f1-4d97-be67-fa98c9802ff1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_Saba1.5-Pro-3B/1762652579.863965", - "retrieved_timestamp": "1762652579.863966", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/Saba1.5-Pro-3B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Saba1.5-Pro-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23860468002677343 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3622910501405146 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44054166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19581117021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 2.9 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba2-14B-Preview/e3e0180f-bbd8-491a-a41b-54801e9f71de.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba2-14B-Preview/e3e0180f-bbd8-491a-a41b-54801e9f71de.json deleted file mode 100644 index 7e0b77da335d9b3b5e35174b3d48d9efa02e1df5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba2-14B-Preview/e3e0180f-bbd8-491a-a41b-54801e9f71de.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_Saba2-14B-Preview/1762652579.864167", - "retrieved_timestamp": "1762652579.864168", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/Saba2-14B-Preview", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Saba2-14B-Preview" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4721871301480073 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.649628096691823 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3825503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4781458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5383976063829787 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba2-3B/b759686f-082e-44b6-9cf8-44a48f66c136.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba2-3B/b759686f-082e-44b6-9cf8-44a48f66c136.json deleted file mode 100644 index ea9b643bf972cd9de786430e69bdc8639a21bca2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba2-3B/b759686f-082e-44b6-9cf8-44a48f66c136.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_Saba2-3B/1762652579.864372", - "retrieved_timestamp": "1762652579.864373", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/Saba2-3B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Saba2-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28651533486704167 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28011877359000464 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2617449664429531 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39269791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12101063829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Sailor-japanese/8449b01f-c489-4008-97d4-aa3f0394cda4.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Sailor-japanese/8449b01f-c489-4008-97d4-aa3f0394cda4.json deleted file mode 100644 index 61fe6c422dc38e83f9373693824817f7f76de26b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Sailor-japanese/8449b01f-c489-4008-97d4-aa3f0394cda4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_Sailor-japanese/1762652579.864587", - "retrieved_timestamp": "1762652579.864588", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/Sailor-japanese", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Sailor-japanese" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16046866757979938 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2912583602962783 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0030211480362537764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3911770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11643949468085106 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-1.5B/854baf47-af97-46dd-acfe-a3710976fd57.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-1.5B/854baf47-af97-46dd-acfe-a3710976fd57.json deleted file mode 100644 index 7945fdd32ea6ea0615220ee5919b35f3abb23799..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-1.5B/854baf47-af97-46dd-acfe-a3710976fd57.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_Saka-1.5B/1762652579.8647912", - "retrieved_timestamp": "1762652579.8647912", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/Saka-1.5B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Saka-1.5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726266306732802 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3987868899865206 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08006042296072508 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37390625000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24152260638297873 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-14B/53556d59-3b32-44bc-9932-c52f05939b57.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-14B/53556d59-3b32-44bc-9932-c52f05939b57.json deleted file mode 100644 index c5cd6c135a1f37ea42cf74c544f31d94e39ea79e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-14B/53556d59-3b32-44bc-9932-c52f05939b57.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_Saka-14B/1762652579.8649821", - "retrieved_timestamp": "1762652579.864983", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/Saka-14B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Saka-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7174341857382855 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6496945295195891 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4093655589123867 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3959731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48859375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.539561170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-24B/a5e13aa9-bf5f-4201-bc93-504521141f43.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-24B/a5e13aa9-bf5f-4201-bc93-504521141f43.json deleted file mode 100644 index a67f288ee8fe2ac50a2d585a669622c600e0edca..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-24B/a5e13aa9-bf5f-4201-bc93-504521141f43.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_Saka-24B/1762652579.865175", - "retrieved_timestamp": "1762652579.865176", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/Saka-24B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Saka-24B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38186123928952953 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6072116494463233 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18051359516616314 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3422818791946309 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45408333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4765625 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-7.2B/07f036d7-af59-49a8-8346-8a9a9dd21439.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-7.2B/07f036d7-af59-49a8-8346-8a9a9dd21439.json deleted file mode 100644 index af1b8f4d156fd9181e28a0f6f0cc9b9325b96a76..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-7.2B/07f036d7-af59-49a8-8346-8a9a9dd21439.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_Saka-7.2B/1762652579.86556", - "retrieved_timestamp": "1762652579.865563", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/Saka-7.2B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Saka-7.2B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1544989516704566 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2945156585364917 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23909395973154363 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37105208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11602393617021277 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.292 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-7.6B/10923a84-a611-4830-b84c-0e91c0628541.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-7.6B/10923a84-a611-4830-b84c-0e91c0628541.json deleted file mode 100644 index 5f9be49a1d7a8d8fe5999c7ff0f711bfebaa1cbe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-7.6B/10923a84-a611-4830-b84c-0e91c0628541.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_Saka-7.6B/1762652579.865891", - "retrieved_timestamp": "1762652579.8658922", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/Saka-7.6B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Saka-7.6B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45242844541372446 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5655284792075981 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3255287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4489375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45403922872340424 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SakaMoe-3x1.6B-Instruct/e806f2f4-0a10-49f6-a67e-dc1dd0a59ede.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SakaMoe-3x1.6B-Instruct/e806f2f4-0a10-49f6-a67e-dc1dd0a59ede.json deleted file mode 100644 index 63079d6d2fd71c6411ffa22c52b6a1ceca8141c8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SakaMoe-3x1.6B-Instruct/e806f2f4-0a10-49f6-a67e-dc1dd0a59ede.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_SakaMoe-3x1.6B-Instruct/1762652579.866188", - "retrieved_timestamp": "1762652579.8661902", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/SakaMoe-3x1.6B-Instruct", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SakaMoe-3x1.6B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23708094522533543 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.328247997224552 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33421875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18824800531914893 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 1.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SakalFusion-7B-Alpha/2329f6f2-228a-400b-9b2d-4ad6dd278b79.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SakalFusion-7B-Alpha/2329f6f2-228a-400b-9b2d-4ad6dd278b79.json deleted file mode 100644 index 3eb02591a37480939f2b9dbd5195d602688b4348..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SakalFusion-7B-Alpha/2329f6f2-228a-400b-9b2d-4ad6dd278b79.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_SakalFusion-7B-Alpha/1762652579.866478", - "retrieved_timestamp": "1762652579.8664792", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/SakalFusion-7B-Alpha", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SakalFusion-7B-Alpha" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5289653674472622 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.559133672829116 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4581458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4473902925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SakalFusion-7B-Beta/537a91f9-b1f3-49bf-bef7-a9ef8578c284.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SakalFusion-7B-Beta/537a91f9-b1f3-49bf-bef7-a9ef8578c284.json deleted file mode 100644 index 5687503db02db18eadf46b41a473e48b023ccafe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SakalFusion-7B-Beta/537a91f9-b1f3-49bf-bef7-a9ef8578c284.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_SakalFusion-7B-Beta/1762652579.866734", - "retrieved_timestamp": "1762652579.8667352", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/SakalFusion-7B-Beta", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/SakalFusion-7B-Beta" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18090222830977362 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2881298650933641 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3872083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10895944148936171 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Tara-3.8B-v1.1/cd884e16-7e4d-4d17-8bad-5819604e0384.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Tara-3.8B-v1.1/cd884e16-7e4d-4d17-8bad-5819604e0384.json deleted file mode 100644 index 7973978eef9ff8f4ae88ffd6ee96e802d8a2eeac..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Tara-3.8B-v1.1/cd884e16-7e4d-4d17-8bad-5819604e0384.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_Tara-3.8B-v1.1/1762652579.866961", - "retrieved_timestamp": "1762652579.866962", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/Tara-3.8B-v1.1", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/Tara-3.8B-v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40621661635571393 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4885743296577029 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4779583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.328125 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_light-1.1-3B/9da5b03b-0207-4e98-a5bf-5a658225e78f.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_light-1.1-3B/9da5b03b-0207-4e98-a5bf-5a658225e78f.json deleted file mode 100644 index c894a3a746a8c76ecf8475c9135e48c15b686a65..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_light-1.1-3B/9da5b03b-0207-4e98-a5bf-5a658225e78f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_light-1.1-3B/1762652579.867201", - "retrieved_timestamp": "1762652579.867202", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/light-1.1-3B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/light-1.1-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27345110972220377 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28027723572953045 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2617449664429531 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3900625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12092752659574468 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_light-3B/a1593642-8d60-4680-90aa-8c3789d536d6.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_light-3B/a1593642-8d60-4680-90aa-8c3789d536d6.json deleted file mode 100644 index 1fa7f3cc12778b7f166119f4a2e364ac1d505d0a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_light-3B/a1593642-8d60-4680-90aa-8c3789d536d6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_light-3B/1762652579.8674219", - "retrieved_timestamp": "1762652579.867423", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/light-3B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/light-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5337360425892188 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4831034368803701 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2590634441087613 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40149999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3774933510638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_light-3b-beta/2a4293ca-2434-4752-a08f-163257e0fde4.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_light-3b-beta/2a4293ca-2434-4752-a08f-163257e0fde4.json deleted file mode 100644 index f77fe325e2e6149d2913d0262e2dbeb171756df9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_light-3b-beta/2a4293ca-2434-4752-a08f-163257e0fde4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_light-3b-beta/1762652579.867648", - "retrieved_timestamp": "1762652579.867649", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/light-3b-beta", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/light-3b-beta" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5485489612007252 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48152297262112204 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.277190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40146875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3758311170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_light-7b-beta/a66efce1-f6d2-4fad-964b-cc4e80012145.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_light-7b-beta/a66efce1-f6d2-4fad-964b-cc4e80012145.json deleted file mode 100644 index fd74e4de56c93411f29577eda52b763bac64e5ff..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_light-7b-beta/a66efce1-f6d2-4fad-964b-cc4e80012145.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_light-7b-beta/1762652579.867865", - "retrieved_timestamp": "1762652579.867866", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/light-7b-beta", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/light-7b-beta" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6233870574520051 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5548193064288276 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3768882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42906249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.445561835106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_llama-3-yanyuedao-8b-instruct/cb550de6-4cd6-411e-9426-dc12421404ad.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_llama-3-yanyuedao-8b-instruct/cb550de6-4cd6-411e-9426-dc12421404ad.json deleted file mode 100644 index 51a2babb30ae1a3b7b82b87e78b221f2234ee332..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_llama-3-yanyuedao-8b-instruct/cb550de6-4cd6-411e-9426-dc12421404ad.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_llama-3-yanyuedao-8b-instruct/1762652579.8681011", - "retrieved_timestamp": "1762652579.8681011", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/llama-3-yanyuedao-8b-instruct", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/llama-3-yanyuedao-8b-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21857116894284942 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43497849055247495 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41985416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29105718085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_magro-7B/c2c87be8-4137-4bcc-8cbe-4589d193e94d.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_magro-7B/c2c87be8-4137-4bcc-8cbe-4589d193e94d.json deleted file mode 100644 index a73d9cc96e36757600076171228ebdaed8410c10..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_magro-7B/c2c87be8-4137-4bcc-8cbe-4589d193e94d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_magro-7B/1762652579.868387", - "retrieved_timestamp": "1762652579.8683882", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/magro-7B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/magro-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13439008497453425 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4185526485966236 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44598958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2765126329787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_mergekit-01/dd01becb-c2c0-4593-ac1e-db2ff11aa17b.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_mergekit-01/dd01becb-c2c0-4593-ac1e-db2ff11aa17b.json deleted file mode 100644 index 003d6d32f0e2be5ef05c7a1b628d357f2363f312..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_mergekit-01/dd01becb-c2c0-4593-ac1e-db2ff11aa17b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_mergekit-01/1762652579.868608", - "retrieved_timestamp": "1762652579.868609", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/mergekit-01", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/mergekit-01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6233870574520051 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5548193064288276 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3768882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42906249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.445561835106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_mergekit-della_linear-vmeykci/a4bd1768-2382-47fe-a8bd-6e42bda06d2f.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_mergekit-della_linear-vmeykci/a4bd1768-2382-47fe-a8bd-6e42bda06d2f.json deleted file mode 100644 index b418895008a2b2c0239bf5dc7d1bfde643484623..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_mergekit-della_linear-vmeykci/a4bd1768-2382-47fe-a8bd-6e42bda06d2f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_mergekit-della_linear-vmeykci/1762652579.868854", - "retrieved_timestamp": "1762652579.868856", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/mergekit-della_linear-vmeykci", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/mergekit-della_linear-vmeykci" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1126078804239418 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28155028620092587 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38968749999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10887632978723404 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_model-3/efd2a4d7-afcd-4653-ad4f-7d4f7206be95.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_model-3/efd2a4d7-afcd-4653-ad4f-7d4f7206be95.json deleted file mode 100644 index e427e1d1b3619d7156bb0751ce7835e41fe39402..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_model-3/efd2a4d7-afcd-4653-ad4f-7d4f7206be95.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_model-3/1762652579.869146", - "retrieved_timestamp": "1762652579.869148", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/model-3", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/model-3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6263846593704703 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.554216994021922 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37084592145015105 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4263958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4454787234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_tara-3.8B/695d7b01-14e6-40e4-b398-541e87a812c8.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_tara-3.8B/695d7b01-14e6-40e4-b398-541e87a812c8.json deleted file mode 100644 index a1fb0e1e47898bf443494118579cc5992a6ccd14..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_tara-3.8B/695d7b01-14e6-40e4-b398-541e87a812c8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_tara-3.8B/1762652579.86961", - "retrieved_timestamp": "1762652579.869611", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/tara-3.8B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/tara-3.8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4077403511571519 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4885743296577029 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4779583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.328125 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-14B-v0.2/f3f888bb-5e99-4521-83b2-4e182f492220.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-14B-v0.2/f3f888bb-5e99-4521-83b2-4e182f492220.json deleted file mode 100644 index 97366d5db746fe564a63e0b9cf92e99ae2f2bd7b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-14B-v0.2/f3f888bb-5e99-4521-83b2-4e182f492220.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-14B-v0.2/1762652579.870035", - "retrieved_timestamp": "1762652579.870036", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/ultiima-14B-v0.2", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/ultiima-14B-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7069930007934502 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6472012505703305 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3995468277945619 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3825503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4793541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5387300531914894 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-14B-v0.3/5cd3794f-990f-4965-9fbc-7faf3216e808.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-14B-v0.3/5cd3794f-990f-4965-9fbc-7faf3216e808.json deleted file mode 100644 index b88384d5d84be348ea7d40bc9e3471dfea532307..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-14B-v0.3/5cd3794f-990f-4965-9fbc-7faf3216e808.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-14B-v0.3/1762652579.870242", - "retrieved_timestamp": "1762652579.870243", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/ultiima-14B-v0.3", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/ultiima-14B-v0.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7040452665593957 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.639820771660141 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3766778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47541666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5336602393617021 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-14B-v0.4/688f9751-e261-41c6-a7a4-2dc33a702e09.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-14B-v0.4/688f9751-e261-41c6-a7a4-2dc33a702e09.json deleted file mode 100644 index 9de242835791776c56e15374ade9b1d45943016b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-14B-v0.4/688f9751-e261-41c6-a7a4-2dc33a702e09.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-14B-v0.4/1762652579.8704672", - "retrieved_timestamp": "1762652579.8704839", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/ultiima-14B-v0.4", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/ultiima-14B-v0.4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3008284684636764 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6420007859105136 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3959731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4885625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.527842420212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-14B/abf448a9-decf-432d-8883-6e1492a7c040.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-14B/abf448a9-decf-432d-8883-6e1492a7c040.json deleted file mode 100644 index 3d84876d181c921e62596866988e684fb249a0e4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-14B/abf448a9-decf-432d-8883-6e1492a7c040.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-14B/1762652579.869824", - "retrieved_timestamp": "1762652579.8698251", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/ultiima-14B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/ultiima-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5700563394016764 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6491153472177067 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4697885196374622 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37416107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4717604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5380651595744681 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-32B/18f686ca-453d-4a0c-9f1a-e2f4ba53399c.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-32B/18f686ca-453d-4a0c-9f1a-e2f4ba53399c.json deleted file mode 100644 index c9cdb3fcda0de2e50dccb78a7c34e5603898bd1d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-32B/18f686ca-453d-4a0c-9f1a-e2f4ba53399c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-32B/1762652579.870782", - "retrieved_timestamp": "1762652579.870784", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/ultiima-32B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/ultiima-32B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6854357549080883 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7037285782797875 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4962235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4994791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5910073138297872 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-72B-v1.5/258aae52-b934-4ba1-bdb0-e15bd8277234.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-72B-v1.5/258aae52-b934-4ba1-bdb0-e15bd8277234.json deleted file mode 100644 index affa15c00db9475fcebac25afe8d505ca4b31bde..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-72B-v1.5/258aae52-b934-4ba1-bdb0-e15bd8277234.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-72B-v1.5/1762652579.8712351", - "retrieved_timestamp": "1762652579.8712351", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/ultiima-72B-v1.5", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/ultiima-72B-v1.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6549610588793291 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7391727188223717 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4395770392749245 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41359060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46909375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6053856382978723 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-72B/cce8480a-353b-4f9b-8f6f-b2f1e9ae601a.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-72B/cce8480a-353b-4f9b-8f6f-b2f1e9ae601a.json deleted file mode 100644 index 518d2e5a4ad84ba22659e1d316082d43232797aa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-72B/cce8480a-353b-4f9b-8f6f-b2f1e9ae601a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-72B/1762652579.8710139", - "retrieved_timestamp": "1762652579.8710148", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/ultiima-72B", - "developer": "Sakalti", - "inference_platform": "unknown", - "id": "Sakalti/ultiima-72B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7140121544169471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7217809739144654 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5354984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41442953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46518750000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.590591755319149 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Salesforce/Salesforce_LLaMA-3-8B-SFR-Iterative-DPO-R/1bf65062-4526-407d-ba4f-866b045dbf3b.json b/leaderboard_data/HFOpenLLMv2/Salesforce/Salesforce_LLaMA-3-8B-SFR-Iterative-DPO-R/1bf65062-4526-407d-ba4f-866b045dbf3b.json deleted file mode 100644 index ca2ab5dc0cf98b6871e39ca85df6d2dd1b22051e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Salesforce/Salesforce_LLaMA-3-8B-SFR-Iterative-DPO-R/1bf65062-4526-407d-ba4f-866b045dbf3b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Salesforce_LLaMA-3-8B-SFR-Iterative-DPO-R/1762652579.8714519", - "retrieved_timestamp": "1762652579.8714519", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R", - "developer": "Salesforce", - "inference_platform": "unknown", - "id": "Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38156203318306536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5011950469666927 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09138972809667674 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36333333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3172373670212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SanjiWatsuki/SanjiWatsuki_Kunoichi-DPO-v2-7B/dc7243af-efa9-4169-8d31-36ef75dfe2e3.json b/leaderboard_data/HFOpenLLMv2/SanjiWatsuki/SanjiWatsuki_Kunoichi-DPO-v2-7B/dc7243af-efa9-4169-8d31-36ef75dfe2e3.json deleted file mode 100644 index 90fa8e33c0ae7b2da0a66ddc1e9150d8e438144d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SanjiWatsuki/SanjiWatsuki_Kunoichi-DPO-v2-7B/dc7243af-efa9-4169-8d31-36ef75dfe2e3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SanjiWatsuki_Kunoichi-DPO-v2-7B/1762652579.871708", - "retrieved_timestamp": "1762652579.871708", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SanjiWatsuki/Kunoichi-DPO-v2-7B", - "developer": "SanjiWatsuki", - "inference_platform": "unknown", - "id": "SanjiWatsuki/Kunoichi-DPO-v2-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5431034100630772 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4415592450869275 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41883333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3106715425531915 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SanjiWatsuki/SanjiWatsuki_Silicon-Maid-7B/5d7ffac9-a734-44ef-aa1e-43ddbe68fd6a.json b/leaderboard_data/HFOpenLLMv2/SanjiWatsuki/SanjiWatsuki_Silicon-Maid-7B/5d7ffac9-a734-44ef-aa1e-43ddbe68fd6a.json deleted file mode 100644 index 2a497900454dd318abab4efc3e092d93959827c6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SanjiWatsuki/SanjiWatsuki_Silicon-Maid-7B/5d7ffac9-a734-44ef-aa1e-43ddbe68fd6a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SanjiWatsuki_Silicon-Maid-7B/1762652579.87197", - "retrieved_timestamp": "1762652579.8719711", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SanjiWatsuki/Silicon-Maid-7B", - "developer": "SanjiWatsuki", - "inference_platform": "unknown", - "id": "SanjiWatsuki/Silicon-Maid-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5367835121920947 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4127972831009074 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41883333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.308344414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_70B-L3.3-Cirrus-x1/660f8ede-1b7f-4438-8a97-51db77058725.json b/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_70B-L3.3-Cirrus-x1/660f8ede-1b7f-4438-8a97-51db77058725.json deleted file mode 100644 index a80e9f8d575747754ab1492f93851c692671527b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_70B-L3.3-Cirrus-x1/660f8ede-1b7f-4438-8a97-51db77058725.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sao10K_70B-L3.3-Cirrus-x1/1762652579.8721752", - "retrieved_timestamp": "1762652579.8721762", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sao10K/70B-L3.3-Cirrus-x1", - "developer": "Sao10K", - "inference_platform": "unknown", - "id": "Sao10K/70B-L3.3-Cirrus-x1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6680751517085777 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7028970787833794 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37386706948640486 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44966442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4841666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5378158244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_Fimbulvetr-11B-v2/135ade7c-f0d1-495a-a5b5-c95712cf0c0f.json b/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_Fimbulvetr-11B-v2/135ade7c-f0d1-495a-a5b5-c95712cf0c0f.json deleted file mode 100644 index dd07ff86ec94eecde430de0e18f50752ff71fdbe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_Fimbulvetr-11B-v2/135ade7c-f0d1-495a-a5b5-c95712cf0c0f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sao10K_Fimbulvetr-11B-v2/1762652579.872427", - "retrieved_timestamp": "1762652579.872428", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sao10K/Fimbulvetr-11B-v2", - "developer": "Sao10K", - "inference_platform": "unknown", - "id": "Sao10K/Fimbulvetr-11B-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5100056738343152 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4544495065184342 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43536458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33011968085106386 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-70B-Euryale-v2.1/09aab7d9-93ac-4aff-840a-d4ccfb0b469d.json b/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-70B-Euryale-v2.1/09aab7d9-93ac-4aff-840a-d4ccfb0b469d.json deleted file mode 100644 index e1507b172713577be67e307f1c826310bcccfe31..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-70B-Euryale-v2.1/09aab7d9-93ac-4aff-840a-d4ccfb0b469d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sao10K_L3-70B-Euryale-v2.1/1762652579.872639", - "retrieved_timestamp": "1762652579.87264", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sao10K/L3-70B-Euryale-v2.1", - "developer": "Sao10K", - "inference_platform": "unknown", - "id": "Sao10K/L3-70B-Euryale-v2.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7384417789243651 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6471322811268715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21374622356495468 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42091666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5103889627659575 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-70B-Euryale-v2.1/d730a2be-1cd8-4851-9ecf-55139af1e8f7.json b/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-70B-Euryale-v2.1/d730a2be-1cd8-4851-9ecf-55139af1e8f7.json deleted file mode 100644 index a895d5c8cf44b0927e02fab60b4c1bca24d513d2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-70B-Euryale-v2.1/d730a2be-1cd8-4851-9ecf-55139af1e8f7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sao10K_L3-70B-Euryale-v2.1/1762652579.872864", - "retrieved_timestamp": "1762652579.872865", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sao10K/L3-70B-Euryale-v2.1", - "developer": "Sao10K", - "inference_platform": "unknown", - "id": "Sao10K/L3-70B-Euryale-v2.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7281003293483512 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6502778992745041 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22432024169184292 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41958333333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5095578457446809 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-8B-Lunaris-v1/e15ed4e3-d33f-4dad-98da-e1dad098a6a1.json b/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-8B-Lunaris-v1/e15ed4e3-d33f-4dad-98da-e1dad098a6a1.json deleted file mode 100644 index 5fc29ffb5c9f313b79d220ddde83347c27877dee..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-8B-Lunaris-v1/e15ed4e3-d33f-4dad-98da-e1dad098a6a1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sao10K_L3-8B-Lunaris-v1/1762652579.8733618", - "retrieved_timestamp": "1762652579.873365", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sao10K/L3-8B-Lunaris-v1", - "developer": "Sao10K", - "inference_platform": "unknown", - "id": "Sao10K/L3-8B-Lunaris-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6894573066131198 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5235299282515419 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3726666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3787400265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-8B-Niitama-v1/9c10e944-3955-4478-9d07-f79769d6b884.json b/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-8B-Niitama-v1/9c10e944-3955-4478-9d07-f79769d6b884.json deleted file mode 100644 index 48f7be488125b013c36e307884cc497e7331bf0e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-8B-Niitama-v1/9c10e944-3955-4478-9d07-f79769d6b884.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sao10K_L3-8B-Niitama-v1/1762652579.8737721", - "retrieved_timestamp": "1762652579.873773", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sao10K/L3-8B-Niitama-v1", - "developer": "Sao10K", - "inference_platform": "unknown", - "id": "Sao10K/L3-8B-Niitama-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6790659893526954 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5302980131787137 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09818731117824774 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3806666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3700964095744681 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-8B-Stheno-v3.2/85a94072-ac79-4c14-abaa-9a6424a03ab5.json b/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-8B-Stheno-v3.2/85a94072-ac79-4c14-abaa-9a6424a03ab5.json deleted file mode 100644 index 2ea2067f9db87c6dcecee64d20516e6f651b3567..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-8B-Stheno-v3.2/85a94072-ac79-4c14-abaa-9a6424a03ab5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sao10K_L3-8B-Stheno-v3.2/1762652579.8740559", - "retrieved_timestamp": "1762652579.874058", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sao10K/L3-8B-Stheno-v3.2", - "developer": "Sao10K", - "inference_platform": "unknown", - "id": "Sao10K/L3-8B-Stheno-v3.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6872841837435781 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.522778637171633 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09290030211480363 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3793645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3768284574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-8B-Stheno-v3.3-32K/279b82ae-62b2-4703-85f2-1e79e42366f0.json b/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-8B-Stheno-v3.3-32K/279b82ae-62b2-4703-85f2-1e79e42366f0.json deleted file mode 100644 index 1a9774c9ba17250534b0e97e108cd97a77be0e0d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-8B-Stheno-v3.3-32K/279b82ae-62b2-4703-85f2-1e79e42366f0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sao10K_L3-8B-Stheno-v3.3-32K/1762652579.874314", - "retrieved_timestamp": "1762652579.874315", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sao10K/L3-8B-Stheno-v3.3-32K", - "developer": "Sao10K", - "inference_platform": "unknown", - "id": "Sao10K/L3-8B-Stheno-v3.3-32K" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46037181345496614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3844012923008206 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3725416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1895777925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_MN-12B-Lyra-v3/2c83813a-8254-4765-9367-efb9ad8c5e6c.json b/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_MN-12B-Lyra-v3/2c83813a-8254-4765-9367-efb9ad8c5e6c.json deleted file mode 100644 index abcea9e76ee2582086462463fdf36f336dafedb4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_MN-12B-Lyra-v3/2c83813a-8254-4765-9367-efb9ad8c5e6c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sao10K_MN-12B-Lyra-v3/1762652579.874634", - "retrieved_timestamp": "1762652579.874634", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sao10K/MN-12B-Lyra-v3", - "developer": "Sao10K", - "inference_platform": "unknown", - "id": "Sao10K/MN-12B-Lyra-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4486063644463357 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4803954360397243 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40190624999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32488364361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V1-32B/482fbdd6-6f39-4971-ac65-1e5e181b667f.json b/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V1-32B/482fbdd6-6f39-4971-ac65-1e5e181b667f.json deleted file mode 100644 index 74486f516a5069e8121c44ba0da09776e7f1a95a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V1-32B/482fbdd6-6f39-4971-ac65-1e5e181b667f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V1-32B/1762652579.874861", - "retrieved_timestamp": "1762652579.8748622", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V1-32B", - "developer": "Saxo", - "inference_platform": "unknown", - "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V1-32B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7971681804279312 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7000545067146033 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624161073825503 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45378125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5792885638297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.76 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V2-32B/0b1758f7-4aee-40a2-b33e-f519107b6687.json b/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V2-32B/0b1758f7-4aee-40a2-b33e-f519107b6687.json deleted file mode 100644 index 4ea1f796e6eaa40dec29d51cafa2eb1bafe947f6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V2-32B/0b1758f7-4aee-40a2-b33e-f519107b6687.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V2-32B/1762652579.875268", - "retrieved_timestamp": "1762652579.8752692", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V2-32B", - "developer": "Saxo", - "inference_platform": "unknown", - "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V2-32B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7956444456264933 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7023193256341814 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41663541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5719747340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.76 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V3-32B/b206b1c9-3469-4b77-b85a-dcd3c6394c67.json b/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V3-32B/b206b1c9-3469-4b77-b85a-dcd3c6394c67.json deleted file mode 100644 index 271439303135401cab63848f8d5aa17c05910073..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V3-32B/b206b1c9-3469-4b77-b85a-dcd3c6394c67.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V3-32B/1762652579.875521", - "retrieved_timestamp": "1762652579.8755221", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V3-32B", - "developer": "Saxo", - "inference_platform": "unknown", - "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V3-32B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8248702332034556 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6913199237437709 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6178247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42745833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.56640625 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V4-32B/52d4b2fe-cbd1-431f-b0e7-04ebfbe852ca.json b/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V4-32B/52d4b2fe-cbd1-431f-b0e7-04ebfbe852ca.json deleted file mode 100644 index 7898ccfa68b27955d128c5b8d5a87bbcbee37ee0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V4-32B/52d4b2fe-cbd1-431f-b0e7-04ebfbe852ca.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V4-32B/1762652579.87576", - "retrieved_timestamp": "1762652579.8757608", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V4-32B", - "developer": "Saxo", - "inference_platform": "unknown", - "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V4-32B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7630963620970137 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6920204096666581 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615771812080537 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4642604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5752160904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V5-32B/b1b0aac0-2921-44ab-ac1b-873b715e9b52.json b/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V5-32B/b1b0aac0-2921-44ab-ac1b-873b715e9b52.json deleted file mode 100644 index c3bd55541a13bd8e64efd4ec233021009f5ed32b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V5-32B/b1b0aac0-2921-44ab-ac1b-873b715e9b52.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V5-32B/1762652579.876068", - "retrieved_timestamp": "1762652579.876069", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V5-32B", - "developer": "Saxo", - "inference_platform": "unknown", - "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V5-32B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7515558717536137 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6928650089977083 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5460725075528701 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35570469798657717 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47086458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5762134308510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V6-32B/977a0388-5c46-42ab-bb93-91f036963f8c.json b/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V6-32B/977a0388-5c46-42ab-bb93-91f036963f8c.json deleted file mode 100644 index 9680056367e9f48b017c3b702a5a13e1eed078ef..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V6-32B/977a0388-5c46-42ab-bb93-91f036963f8c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V6-32B/1762652579.87637", - "retrieved_timestamp": "1762652579.876371", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V6-32B", - "developer": "Saxo", - "inference_platform": "unknown", - "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V6-32B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8208985491828349 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6889783858832969 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.622356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42742708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5672373670212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.76 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Korean-Avengers-V2-27B/52438151-a1c8-440c-a9be-3670b18c1ef6.json b/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Korean-Avengers-V2-27B/52438151-a1c8-440c-a9be-3670b18c1ef6.json deleted file mode 100644 index 167e6457e44b1db1c97fdca9d195789b3e151088..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Korean-Avengers-V2-27B/52438151-a1c8-440c-a9be-3670b18c1ef6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Korean-Avengers-V2-27B/1762652579.876656", - "retrieved_timestamp": "1762652579.876657", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V2-27B", - "developer": "Saxo", - "inference_platform": "unknown", - "id": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V2-27B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8145786513118525 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6463223196116569 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802114803625378 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4139375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45985704787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Korean-Avengers-V3-27B/993cc036-0e33-4d0e-b1b3-f97a9645f4c5.json b/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Korean-Avengers-V3-27B/993cc036-0e33-4d0e-b1b3-f97a9645f4c5.json deleted file mode 100644 index 854f40a92ca139a42572400f4522b78ab5b6b596..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Korean-Avengers-V3-27B/993cc036-0e33-4d0e-b1b3-f97a9645f4c5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Korean-Avengers-V3-27B/1762652579.876898", - "retrieved_timestamp": "1762652579.876899", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V3-27B", - "developer": "Saxo", - "inference_platform": "unknown", - "id": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V3-27B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.81420408959339 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6403963618749583 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35906040268456374 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44667708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4523769946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Korean-Superb-22B/53a6fd3e-37c5-4abc-b387-0ef9f4225760.json b/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Korean-Superb-22B/53a6fd3e-37c5-4abc-b387-0ef9f4225760.json deleted file mode 100644 index 28e55bd6c08efd9a34074fa89a9fad8acdf8090e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Korean-Superb-22B/53a6fd3e-37c5-4abc-b387-0ef9f4225760.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Korean-Superb-22B/1762652579.877154", - "retrieved_timestamp": "1762652579.877155", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-22B", - "developer": "Saxo", - "inference_platform": "unknown", - "id": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-22B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6766679078179231 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5625539568927603 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23716012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3907708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3871343085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Korean-Superb-27B/420f358d-c7a0-4bb5-9d0a-6c44e1f2a354.json b/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Korean-Superb-27B/420f358d-c7a0-4bb5-9d0a-6c44e1f2a354.json deleted file mode 100644 index f9152b4a84ce274aefcc056cf4476ea3300a1844..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Korean-Superb-27B/420f358d-c7a0-4bb5-9d0a-6c44e1f2a354.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Korean-Superb-27B/1762652579.87745", - "retrieved_timestamp": "1762652579.877451", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-27B", - "developer": "Saxo", - "inference_platform": "unknown", - "id": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-27B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7767601076255447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6518345685119445 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2719033232628399 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3598993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47913541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4646775265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Superb-27B/e7007251-609e-4c81-86cf-d6fb79c896c2.json b/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Superb-27B/e7007251-609e-4c81-86cf-d6fb79c896c2.json deleted file mode 100644 index 299ea70e53d280f70b550689fffc22a58b88c5f4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Superb-27B/e7007251-609e-4c81-86cf-d6fb79c896c2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Superb-27B/1762652579.877677", - "retrieved_timestamp": "1762652579.8776782", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Saxo/Linkbricks-Horizon-AI-Superb-27B", - "developer": "Saxo", - "inference_platform": "unknown", - "id": "Saxo/Linkbricks-Horizon-AI-Superb-27B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7302235845334822 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6186245528925046 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22205438066465258 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3573825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.465 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.406000664893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SeaLLMs/SeaLLMs_SeaLLM-7B-v2.5/7117b360-ef16-4da9-9226-b66b6aac9703.json b/leaderboard_data/HFOpenLLMv2/SeaLLMs/SeaLLMs_SeaLLM-7B-v2.5/7117b360-ef16-4da9-9226-b66b6aac9703.json deleted file mode 100644 index 981a8a2678d7a8f25897d4c2134c2ae7ec56f4c3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SeaLLMs/SeaLLMs_SeaLLM-7B-v2.5/7117b360-ef16-4da9-9226-b66b6aac9703.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SeaLLMs_SeaLLM-7B-v2.5/1762652579.878138", - "retrieved_timestamp": "1762652579.8781388", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SeaLLMs/SeaLLM-7B-v2.5", - "developer": "SeaLLMs", - "inference_platform": "unknown", - "id": "SeaLLMs/SeaLLM-7B-v2.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4521536190640833 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49802029594352754 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10876132930513595 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42032291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3203125 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SeaLLMs/SeaLLMs_SeaLLM-7B-v2/8f41a438-e9b7-43c6-b0b2-447a71ac360f.json b/leaderboard_data/HFOpenLLMv2/SeaLLMs/SeaLLMs_SeaLLM-7B-v2/8f41a438-e9b7-43c6-b0b2-447a71ac360f.json deleted file mode 100644 index e930772fe4d15525fac687f56d3d641eb1f4f2d6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SeaLLMs/SeaLLMs_SeaLLM-7B-v2/8f41a438-e9b7-43c6-b0b2-447a71ac360f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SeaLLMs_SeaLLM-7B-v2/1762652579.877889", - "retrieved_timestamp": "1762652579.877889", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SeaLLMs/SeaLLM-7B-v2", - "developer": "SeaLLMs", - "inference_platform": "unknown", - "id": "SeaLLMs/SeaLLM-7B-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36712367629002157 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4902100795458318 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08534743202416918 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4069583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30826130319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.376 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SeaLLMs/SeaLLMs_SeaLLMs-v3-7B-Chat/f119b2b5-2303-4772-9ae0-ce8f573f86c3.json b/leaderboard_data/HFOpenLLMv2/SeaLLMs/SeaLLMs_SeaLLMs-v3-7B-Chat/f119b2b5-2303-4772-9ae0-ce8f573f86c3.json deleted file mode 100644 index 88b6f08c87dabf4b81b1161581179d4c2041e081..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SeaLLMs/SeaLLMs_SeaLLMs-v3-7B-Chat/f119b2b5-2303-4772-9ae0-ce8f573f86c3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SeaLLMs_SeaLLMs-v3-7B-Chat/1762652579.8783438", - "retrieved_timestamp": "1762652579.878345", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SeaLLMs/SeaLLMs-v3-7B-Chat", - "developer": "SeaLLMs", - "inference_platform": "unknown", - "id": "SeaLLMs/SeaLLMs-v3-7B-Chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43766539448662883 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5266406284595359 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.417375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3894614361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SenseLLM/SenseLLM_ReflectionCoder-CL-34B/5d7a3d90-8017-4415-a1da-eb70f6145fe4.json b/leaderboard_data/HFOpenLLMv2/SenseLLM/SenseLLM_ReflectionCoder-CL-34B/5d7a3d90-8017-4415-a1da-eb70f6145fe4.json deleted file mode 100644 index 37070d00bc7dd50de1245176b46f2c4c5f2ba988..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SenseLLM/SenseLLM_ReflectionCoder-CL-34B/5d7a3d90-8017-4415-a1da-eb70f6145fe4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SenseLLM_ReflectionCoder-CL-34B/1762652579.8785448", - "retrieved_timestamp": "1762652579.878546", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SenseLLM/ReflectionCoder-CL-34B", - "developer": "SenseLLM", - "inference_platform": "unknown", - "id": "SenseLLM/ReflectionCoder-CL-34B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4007710652180658 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39529304297033296 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41548958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14237034574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 33.744 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SenseLLM/SenseLLM_ReflectionCoder-DS-33B/2ee4584d-b18c-44dd-af63-22c28b92e107.json b/leaderboard_data/HFOpenLLMv2/SenseLLM/SenseLLM_ReflectionCoder-DS-33B/2ee4584d-b18c-44dd-af63-22c28b92e107.json deleted file mode 100644 index 407b904df6be80b9ea1805b930b1d180286e3fe1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SenseLLM/SenseLLM_ReflectionCoder-DS-33B/2ee4584d-b18c-44dd-af63-22c28b92e107.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SenseLLM_ReflectionCoder-DS-33B/1762652579.878793", - "retrieved_timestamp": "1762652579.878794", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SenseLLM/ReflectionCoder-DS-33B", - "developer": "SenseLLM", - "inference_platform": "unknown", - "id": "SenseLLM/ReflectionCoder-DS-33B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3786641666334215 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3449447540164568 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3343125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12017952127659574 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 33.34 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SeppeV/SeppeV_SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo/ff284b60-0c7c-4825-af77-5922831cb3b8.json b/leaderboard_data/HFOpenLLMv2/SeppeV/SeppeV_SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo/ff284b60-0c7c-4825-af77-5922831cb3b8.json deleted file mode 100644 index 6a6f303319436c009ede4e9e20be39e555c96513..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SeppeV/SeppeV_SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo/ff284b60-0c7c-4825-af77-5922831cb3b8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SeppeV_SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo/1762652579.879464", - "retrieved_timestamp": "1762652579.8794649", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SeppeV/SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo", - "developer": "SeppeV", - "inference_platform": "unknown", - "id": "SeppeV/SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09554648333089535 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3072665948660797 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40320833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11610704787234043 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sharathhebbar24/Sharathhebbar24_SSH_355M/9ff82d83-2a89-48d8-8ad0-91637a77bc76.json b/leaderboard_data/HFOpenLLMv2/Sharathhebbar24/Sharathhebbar24_SSH_355M/9ff82d83-2a89-48d8-8ad0-91637a77bc76.json deleted file mode 100644 index 7bd9dda272077e34a48128242da02b94ceb3732b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sharathhebbar24/Sharathhebbar24_SSH_355M/9ff82d83-2a89-48d8-8ad0-91637a77bc76.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sharathhebbar24_SSH_355M/1762652579.8797262", - "retrieved_timestamp": "1762652579.8797271", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sharathhebbar24/SSH_355M", - "developer": "Sharathhebbar24", - "inference_platform": "unknown", - "id": "Sharathhebbar24/SSH_355M" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1423589409433636 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30985907344593705 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41775 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11760305851063829 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.355 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Shreyash2010/Shreyash2010_Uma-4x4B-Instruct-v0.1/83fa529b-8c61-4017-92a8-ec0f46eb7bba.json b/leaderboard_data/HFOpenLLMv2/Shreyash2010/Shreyash2010_Uma-4x4B-Instruct-v0.1/83fa529b-8c61-4017-92a8-ec0f46eb7bba.json deleted file mode 100644 index f384db25b91f77742e3538fb4370cb0f265627ea..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Shreyash2010/Shreyash2010_Uma-4x4B-Instruct-v0.1/83fa529b-8c61-4017-92a8-ec0f46eb7bba.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Shreyash2010_Uma-4x4B-Instruct-v0.1/1762652579.880244", - "retrieved_timestamp": "1762652579.880245", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Shreyash2010/Uma-4x4B-Instruct-v0.1", - "developer": "Shreyash2010", - "inference_platform": "unknown", - "id": "Shreyash2010/Uma-4x4B-Instruct-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5516961661724225 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5511602059856503 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17749244712990936 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4441041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.386968085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sicarius-Prototyping/Sicarius-Prototyping_Micropenis_1B/1ce9038a-7f1f-4b79-9fbc-9e78660094b3.json b/leaderboard_data/HFOpenLLMv2/Sicarius-Prototyping/Sicarius-Prototyping_Micropenis_1B/1ce9038a-7f1f-4b79-9fbc-9e78660094b3.json deleted file mode 100644 index b830d4d2bfa55eedc0a0a6d0a11f8348e6b87ac6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sicarius-Prototyping/Sicarius-Prototyping_Micropenis_1B/1ce9038a-7f1f-4b79-9fbc-9e78660094b3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sicarius-Prototyping_Micropenis_1B/1762652579.8808", - "retrieved_timestamp": "1762652579.880801", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sicarius-Prototyping/Micropenis_1B", - "developer": "Sicarius-Prototyping", - "inference_platform": "unknown", - "id": "Sicarius-Prototyping/Micropenis_1B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3460662154195313 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3372377910880025 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3325416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18600398936170212 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.618 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sicarius-Prototyping/Sicarius-Prototyping_bacon_and_food/af3374c8-5a23-4a87-990b-123803107ed8.json b/leaderboard_data/HFOpenLLMv2/Sicarius-Prototyping/Sicarius-Prototyping_bacon_and_food/af3374c8-5a23-4a87-990b-123803107ed8.json deleted file mode 100644 index 82d0c6e359ea479997f5eccd49cf58f47704e1ed..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sicarius-Prototyping/Sicarius-Prototyping_bacon_and_food/af3374c8-5a23-4a87-990b-123803107ed8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sicarius-Prototyping_bacon_and_food/1762652579.881054", - "retrieved_timestamp": "1762652579.881054", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sicarius-Prototyping/bacon_and_food", - "developer": "Sicarius-Prototyping", - "inference_platform": "unknown", - "id": "Sicarius-Prototyping/bacon_and_food" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5860428108529812 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47245798883729967 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09818731117824774 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3883854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3262965425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_2B-ad/31fd60ef-db8f-4785-b486-7a06f1cdf981.json b/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_2B-ad/31fd60ef-db8f-4785-b486-7a06f1cdf981.json deleted file mode 100644 index 93074dbe1d72b139e6557ff1a84f7a6e9ae9b370..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_2B-ad/31fd60ef-db8f-4785-b486-7a06f1cdf981.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_2B-ad/1762652579.88126", - "retrieved_timestamp": "1762652579.881261", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SicariusSicariiStuff/2B-ad", - "developer": "SicariusSicariiStuff", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/2B-ad" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4378903531518593 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40922431523996955 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40153124999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2662067819148936 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 3.204 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_2B_or_not_2B/983cf552-1ab1-49ba-aab0-1e644e9a7acb.json b/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_2B_or_not_2B/983cf552-1ab1-49ba-aab0-1e644e9a7acb.json deleted file mode 100644 index 1f77cfec68266b9b68eb8236c12e3b1e1afdd0a8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_2B_or_not_2B/983cf552-1ab1-49ba-aab0-1e644e9a7acb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_2B_or_not_2B/1762652579.881506", - "retrieved_timestamp": "1762652579.881506", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SicariusSicariiStuff/2B_or_not_2B", - "developer": "SicariusSicariiStuff", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/2B_or_not_2B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2062316874781136 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3415917024092019 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3790833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13987699468085107 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Dusk_Rainbow/e8f1d0e1-4086-4645-983b-b9470a22b522.json b/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Dusk_Rainbow/e8f1d0e1-4086-4645-983b-b9470a22b522.json deleted file mode 100644 index 8b6937a6fcaae8d1108b0387acfb67bd1fb8c83c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Dusk_Rainbow/e8f1d0e1-4086-4645-983b-b9470a22b522.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Dusk_Rainbow/1762652579.881711", - "retrieved_timestamp": "1762652579.8817122", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SicariusSicariiStuff/Dusk_Rainbow", - "developer": "SicariusSicariiStuff", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Dusk_Rainbow" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3588057465303173 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47717504280736184 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07477341389728097 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40252083333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3443317819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Eximius_Persona_5B/98406fba-a2e4-4afd-a121-e33a723d2eb6.json b/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Eximius_Persona_5B/98406fba-a2e4-4afd-a121-e33a723d2eb6.json deleted file mode 100644 index b931ebf82e359e8de97eb437e4c45cd8bb66ce1b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Eximius_Persona_5B/98406fba-a2e4-4afd-a121-e33a723d2eb6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Eximius_Persona_5B/1762652579.881908", - "retrieved_timestamp": "1762652579.881909", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SicariusSicariiStuff/Eximius_Persona_5B", - "developer": "SicariusSicariiStuff", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Eximius_Persona_5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6559850086658954 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4511736018571028 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38181249999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31399601063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 5.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Impish_Mind_8B/3a0633f1-070a-416d-a7ab-f41dd44f577d.json b/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Impish_Mind_8B/3a0633f1-070a-416d-a7ab-f41dd44f577d.json deleted file mode 100644 index d208466488becaa6694cd18f01725abeb8607cd1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Impish_Mind_8B/3a0633f1-070a-416d-a7ab-f41dd44f577d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Impish_Mind_8B/1762652579.8823712", - "retrieved_timestamp": "1762652579.8823712", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SicariusSicariiStuff/Impish_Mind_8B", - "developer": "SicariusSicariiStuff", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Impish_Mind_8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31791424531354584 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46736571616627115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10498489425981873 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4069583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3308676861702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Redemption_Wind_24B/21216e0b-dc97-4502-ba3d-d47ad1ac73b2.json b/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Redemption_Wind_24B/21216e0b-dc97-4502-ba3d-d47ad1ac73b2.json deleted file mode 100644 index a06b7f3e47a05c8fe15a443cf4e1a40e4c43e200..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Redemption_Wind_24B/21216e0b-dc97-4502-ba3d-d47ad1ac73b2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Redemption_Wind_24B/1762652579.8843782", - "retrieved_timestamp": "1762652579.884379", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SicariusSicariiStuff/Redemption_Wind_24B", - "developer": "SicariusSicariiStuff", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Redemption_Wind_24B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25014517037017336 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.642816406969129 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38338926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4262395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.543218085106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Winged_Imp_8B/dd1936aa-9b21-466d-b74a-807fafd9f24a.json b/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Winged_Imp_8B/dd1936aa-9b21-466d-b74a-807fafd9f24a.json deleted file mode 100644 index e6afc71c95f33ea931ddc0004eaa68dd5ee2019c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Winged_Imp_8B/dd1936aa-9b21-466d-b74a-807fafd9f24a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Winged_Imp_8B/1762652579.8845959", - "retrieved_timestamp": "1762652579.884597", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SicariusSicariiStuff/Winged_Imp_8B", - "developer": "SicariusSicariiStuff", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Winged_Imp_8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.743012983328679 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5120376322048542 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41483333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3638630319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Wingless_Imp_8B/2304646d-a399-40c0-8577-0bab9ad2ff3c.json b/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Wingless_Imp_8B/2304646d-a399-40c0-8577-0bab9ad2ff3c.json deleted file mode 100644 index 5c5fa33b2fd2075cb30c2de3a084e7c7bf7c04aa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Wingless_Imp_8B/2304646d-a399-40c0-8577-0bab9ad2ff3c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Wingless_Imp_8B/1762652579.8848069", - "retrieved_timestamp": "1762652579.8848078", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SicariusSicariiStuff/Wingless_Imp_8B", - "developer": "SicariusSicariiStuff", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Wingless_Imp_8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.743012983328679 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5120376322048542 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41483333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3638630319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Zion_Alpha/9d6d36b1-f8ad-4cc8-b904-c7e3b0a923e4.json b/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Zion_Alpha/9d6d36b1-f8ad-4cc8-b904-c7e3b0a923e4.json deleted file mode 100644 index c60e4aa61831acf00bf21e6aa7177cffa2a5f43c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Zion_Alpha/9d6d36b1-f8ad-4cc8-b904-c7e3b0a923e4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Zion_Alpha/1762652579.885025", - "retrieved_timestamp": "1762652579.885026", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SicariusSicariiStuff/Zion_Alpha", - "developer": "SicariusSicariiStuff", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Zion_Alpha" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3324024698910003 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49321099934509743 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4726875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31316489361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_dn_ep02/f7f3caa2-0468-4dfb-a817-bb5cdc977911.json b/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_dn_ep02/f7f3caa2-0468-4dfb-a817-bb5cdc977911.json deleted file mode 100644 index 8d1843bfb319c2fb1b6132263901297d6c7a6211..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_dn_ep02/f7f3caa2-0468-4dfb-a817-bb5cdc977911.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_dn_ep02/1762652579.885246", - "retrieved_timestamp": "1762652579.885247", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SicariusSicariiStuff/dn_ep02", - "developer": "SicariusSicariiStuff", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/dn_ep02" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5064340394597445 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5266008759836228 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1419939577039275 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43163541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39976728723404253 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SkyOrbis/SkyOrbis_SKY-Ko-Qwen2.5-3B-Instruct/bdcf5d38-55d2-4f55-8bd1-7f4cd94f758c.json b/leaderboard_data/HFOpenLLMv2/SkyOrbis/SkyOrbis_SKY-Ko-Qwen2.5-3B-Instruct/bdcf5d38-55d2-4f55-8bd1-7f4cd94f758c.json deleted file mode 100644 index c224bff8184e5c155aab7652e37c43c9df24f58b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SkyOrbis/SkyOrbis_SKY-Ko-Qwen2.5-3B-Instruct/bdcf5d38-55d2-4f55-8bd1-7f4cd94f758c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Qwen2.5-3B-Instruct/1762652579.887695", - "retrieved_timestamp": "1762652579.8876958", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SkyOrbis/SKY-Ko-Qwen2.5-3B-Instruct", - "developer": "SkyOrbis", - "inference_platform": "unknown", - "id": "SkyOrbis/SKY-Ko-Qwen2.5-3B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3534100630770799 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4264821228336018 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06948640483383686 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40236458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28116688829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SkyOrbis/SkyOrbis_SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000/7875e792-80dd-4fa8-9743-b8ef42a4cdb7.json b/leaderboard_data/HFOpenLLMv2/SkyOrbis/SkyOrbis_SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000/7875e792-80dd-4fa8-9743-b8ef42a4cdb7.json deleted file mode 100644 index 5c393fbac5cbab379c51e61e941d14af198f68c7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SkyOrbis/SkyOrbis_SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000/7875e792-80dd-4fa8-9743-b8ef42a4cdb7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000/1762652579.888021", - "retrieved_timestamp": "1762652579.888022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000", - "developer": "SkyOrbis", - "inference_platform": "unknown", - "id": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38188672721711725 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5077962006048589 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1865558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44360416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3913730053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SkyOrbis/SkyOrbis_SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000/9354b915-68cd-47ca-a1e8-7481a8b33c49.json b/leaderboard_data/HFOpenLLMv2/SkyOrbis/SkyOrbis_SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000/9354b915-68cd-47ca-a1e8-7481a8b33c49.json deleted file mode 100644 index 1fc7e1e52c5a4f2c922518f92a76e855058ad44a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SkyOrbis/SkyOrbis_SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000/9354b915-68cd-47ca-a1e8-7481a8b33c49.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000/1762652579.8882601", - "retrieved_timestamp": "1762652579.888261", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000", - "developer": "SkyOrbis", - "inference_platform": "unknown", - "id": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3812373391490135 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5389864554242366 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20996978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4237916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42378656914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Solshine/Solshine_Brimful-merged-replete/6523a08c-7a43-4784-9650-e1d5144fcfcf.json b/leaderboard_data/HFOpenLLMv2/Solshine/Solshine_Brimful-merged-replete/6523a08c-7a43-4784-9650-e1d5144fcfcf.json deleted file mode 100644 index 6a47de4bb8fd01da93a6dd145e5831d38ac01621..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Solshine/Solshine_Brimful-merged-replete/6523a08c-7a43-4784-9650-e1d5144fcfcf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Solshine_Brimful-merged-replete/1762652579.8890932", - "retrieved_timestamp": "1762652579.8890939", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Solshine/Brimful-merged-replete", - "developer": "Solshine", - "inference_platform": "unknown", - "id": "Solshine/Brimful-merged-replete" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17605619755581856 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28834447696551024 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0030211480362537764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.342125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10846077127659574 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 12.277 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sourjayon/Sourjayon_DeepSeek-R1-8b-Sify/55a6c2c7-d29e-43a2-abd6-435117967a5d.json b/leaderboard_data/HFOpenLLMv2/Sourjayon/Sourjayon_DeepSeek-R1-8b-Sify/55a6c2c7-d29e-43a2-abd6-435117967a5d.json deleted file mode 100644 index 22ae550c02714e7d5ff54a609252841ccce25b4a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sourjayon/Sourjayon_DeepSeek-R1-8b-Sify/55a6c2c7-d29e-43a2-abd6-435117967a5d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sourjayon_DeepSeek-R1-8b-Sify/1762652579.89035", - "retrieved_timestamp": "1762652579.890351", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sourjayon/DeepSeek-R1-8b-Sify", - "developer": "Sourjayon", - "inference_platform": "unknown", - "id": "Sourjayon/DeepSeek-R1-8b-Sify" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3679481553389451 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33793580116642347 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24471299093655588 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3303125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19805518617021275 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Sourjayon/Sourjayon_DeepSeek-R1-ForumNXT/101d8dec-2e39-47d1-b76d-d91d6562feff.json b/leaderboard_data/HFOpenLLMv2/Sourjayon/Sourjayon_DeepSeek-R1-ForumNXT/101d8dec-2e39-47d1-b76d-d91d6562feff.json deleted file mode 100644 index 74cf27e5d7378a245dcf4ea95c8db491e7dd0b4c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Sourjayon/Sourjayon_DeepSeek-R1-ForumNXT/101d8dec-2e39-47d1-b76d-d91d6562feff.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sourjayon_DeepSeek-R1-ForumNXT/1762652579.890614", - "retrieved_timestamp": "1762652579.890615", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sourjayon/DeepSeek-R1-ForumNXT", - "developer": "Sourjayon", - "inference_platform": "unknown", - "id": "Sourjayon/DeepSeek-R1-ForumNXT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26028714920854445 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3310198487331462 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25755287009063443 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3392395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16481050531914893 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SpaceYL/SpaceYL_ECE_Poirot/32feb55a-fde5-4bbd-b93e-abffc1a7e573.json b/leaderboard_data/HFOpenLLMv2/SpaceYL/SpaceYL_ECE_Poirot/32feb55a-fde5-4bbd-b93e-abffc1a7e573.json deleted file mode 100644 index f03de3e07ec1ef8916adda435e2f37fdd0a844bb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SpaceYL/SpaceYL_ECE_Poirot/32feb55a-fde5-4bbd-b93e-abffc1a7e573.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SpaceYL_ECE_Poirot/1762652579.890822", - "retrieved_timestamp": "1762652579.890822", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SpaceYL/ECE_Poirot", - "developer": "SpaceYL", - "inference_platform": "unknown", - "id": "SpaceYL/ECE_Poirot" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3106956209524063 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42622349736626014 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09138972809667674 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40264583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2883144946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Spestly/Spestly_Athena-1-3B/29d6834e-38f7-472f-86be-79a8fce03989.json b/leaderboard_data/HFOpenLLMv2/Spestly/Spestly_Athena-1-3B/29d6834e-38f7-472f-86be-79a8fce03989.json deleted file mode 100644 index 306d6037cfd8c2e371a87fc7a2de07db9530fb5e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Spestly/Spestly_Athena-1-3B/29d6834e-38f7-472f-86be-79a8fce03989.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Spestly_Athena-1-3B/1762652579.8910668", - "retrieved_timestamp": "1762652579.891068", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Spestly/Athena-1-3B", - "developer": "Spestly", - "inference_platform": "unknown", - "id": "Spestly/Athena-1-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5569167586448401 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47015477265388084 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23791540785498488 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43622916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35189494680851063 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Spestly/Spestly_Atlas-Pro-1.5B-Preview/8282705f-6b69-40c2-825d-8e0c72756083.json b/leaderboard_data/HFOpenLLMv2/Spestly/Spestly_Atlas-Pro-1.5B-Preview/8282705f-6b69-40c2-825d-8e0c72756083.json deleted file mode 100644 index b65bf5e42cc2cafaee931bb5bdd676f633f968c8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Spestly/Spestly_Atlas-Pro-1.5B-Preview/8282705f-6b69-40c2-825d-8e0c72756083.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Spestly_Atlas-Pro-1.5B-Preview/1762652579.891309", - "retrieved_timestamp": "1762652579.89131", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Spestly/Atlas-Pro-1.5B-Preview", - "developer": "Spestly", - "inference_platform": "unknown", - "id": "Spestly/Atlas-Pro-1.5B-Preview" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2429509257658568 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.349893585329524 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31948640483383683 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3354270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1924867021276596 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Spestly/Spestly_Atlas-Pro-7B-Preview/57a36976-0868-462e-ab57-3addef7ea2f9.json b/leaderboard_data/HFOpenLLMv2/Spestly/Spestly_Atlas-Pro-7B-Preview/57a36976-0868-462e-ab57-3addef7ea2f9.json deleted file mode 100644 index 55a5565d004e7abe17fed6dc678d99e6370ab6d0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Spestly/Spestly_Atlas-Pro-7B-Preview/57a36976-0868-462e-ab57-3addef7ea2f9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Spestly_Atlas-Pro-7B-Preview/1762652579.891519", - "retrieved_timestamp": "1762652579.89152", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Spestly/Atlas-Pro-7B-Preview", - "developer": "Spestly", - "inference_platform": "unknown", - "id": "Spestly/Atlas-Pro-7B-Preview" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31541642840995227 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46679203304308553 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5083081570996979 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3910833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2970412234042553 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Stark2008/Stark2008_GutenLaserPi/e418f7d1-8fd6-44ea-bc33-62fb525589f1.json b/leaderboard_data/HFOpenLLMv2/Stark2008/Stark2008_GutenLaserPi/e418f7d1-8fd6-44ea-bc33-62fb525589f1.json deleted file mode 100644 index ef3a16d51efacb419c243818576e596b68c5447e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Stark2008/Stark2008_GutenLaserPi/e418f7d1-8fd6-44ea-bc33-62fb525589f1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Stark2008_GutenLaserPi/1762652579.891723", - "retrieved_timestamp": "1762652579.891723", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Stark2008/GutenLaserPi", - "developer": "Stark2008", - "inference_platform": "unknown", - "id": "Stark2008/GutenLaserPi" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42265300513747966 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5212342482489518 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4620208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31058843085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Stark2008/Stark2008_LayleleFlamPi/c12a519e-9d34-4671-8e98-c69178e08ec0.json b/leaderboard_data/HFOpenLLMv2/Stark2008/Stark2008_LayleleFlamPi/c12a519e-9d34-4671-8e98-c69178e08ec0.json deleted file mode 100644 index eea5e7327080b40d29df05de15fc66aa3c7eef20..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Stark2008/Stark2008_LayleleFlamPi/c12a519e-9d34-4671-8e98-c69178e08ec0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Stark2008_LayleleFlamPi/1762652579.8919628", - "retrieved_timestamp": "1762652579.891964", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Stark2008/LayleleFlamPi", - "developer": "Stark2008", - "inference_platform": "unknown", - "id": "Stark2008/LayleleFlamPi" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42842325030917966 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5115654142581095 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46084375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3093417553191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Stark2008/Stark2008_VisFlamCat/ed5f857e-6799-4729-a2e5-afbea4b89ecd.json b/leaderboard_data/HFOpenLLMv2/Stark2008/Stark2008_VisFlamCat/ed5f857e-6799-4729-a2e5-afbea4b89ecd.json deleted file mode 100644 index 110ddf110e1c8eddf795ca7decfce9f4d1a3916a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Stark2008/Stark2008_VisFlamCat/ed5f857e-6799-4729-a2e5-afbea4b89ecd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Stark2008_VisFlamCat/1762652579.892166", - "retrieved_timestamp": "1762652579.892166", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Stark2008/VisFlamCat", - "developer": "Stark2008", - "inference_platform": "unknown", - "id": "Stark2008/VisFlamCat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43659157701565177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5216957865099948 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44627083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31441156914893614 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Steelskull/Steelskull_L3.3-MS-Nevoria-70b/5db5f87b-9bb0-4d29-b578-72bb896f3359.json b/leaderboard_data/HFOpenLLMv2/Steelskull/Steelskull_L3.3-MS-Nevoria-70b/5db5f87b-9bb0-4d29-b578-72bb896f3359.json deleted file mode 100644 index 9ed5f258190e6c4b43b134b70f0fc752df991556..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Steelskull/Steelskull_L3.3-MS-Nevoria-70b/5db5f87b-9bb0-4d29-b578-72bb896f3359.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Steelskull_L3.3-MS-Nevoria-70b/1762652579.8924139", - "retrieved_timestamp": "1762652579.892415", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Steelskull/L3.3-MS-Nevoria-70b", - "developer": "Steelskull", - "inference_platform": "unknown", - "id": "Steelskull/L3.3-MS-Nevoria-70b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6963268571833845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6997536580025828 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3957703927492447 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47063758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4682291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5535239361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Steelskull/Steelskull_L3.3-Nevoria-R1-70b/1465ebc9-f2c3-46df-b5e1-37e7a027fde8.json b/leaderboard_data/HFOpenLLMv2/Steelskull/Steelskull_L3.3-Nevoria-R1-70b/1465ebc9-f2c3-46df-b5e1-37e7a027fde8.json deleted file mode 100644 index ea033a79a766e63624658b69fb3118c469253925..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Steelskull/Steelskull_L3.3-Nevoria-R1-70b/1465ebc9-f2c3-46df-b5e1-37e7a027fde8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Steelskull_L3.3-Nevoria-R1-70b/1762652579.892649", - "retrieved_timestamp": "1762652579.89265", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Steelskull/L3.3-Nevoria-R1-70b", - "developer": "Steelskull", - "inference_platform": "unknown", - "id": "Steelskull/L3.3-Nevoria-R1-70b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6023794642659255 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6971668662651651 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46895973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47753125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5462932180851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/StelleX/StelleX_Vorisatex-7B-preview/875156be-2ff9-4ec4-8085-27f22fb19259.json b/leaderboard_data/HFOpenLLMv2/StelleX/StelleX_Vorisatex-7B-preview/875156be-2ff9-4ec4-8085-27f22fb19259.json deleted file mode 100644 index a66bd76cdd1cbfe8a4b74de6fff078ee2ef16fe0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/StelleX/StelleX_Vorisatex-7B-preview/875156be-2ff9-4ec4-8085-27f22fb19259.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/StelleX_Vorisatex-7B-preview/1762652579.893095", - "retrieved_timestamp": "1762652579.893096", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "StelleX/Vorisatex-7B-preview", - "developer": "StelleX", - "inference_platform": "unknown", - "id": "StelleX/Vorisatex-7B-preview" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1515013497519914 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3111695757290421 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41923958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11660571808510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SultanR/SultanR_SmolTulu-1.7b-Instruct/1b0bd686-fd26-441f-b280-97b10bb1449c.json b/leaderboard_data/HFOpenLLMv2/SultanR/SultanR_SmolTulu-1.7b-Instruct/1b0bd686-fd26-441f-b280-97b10bb1449c.json deleted file mode 100644 index 83c7ff8de80cd87232494b0b34f5cc3510d2d4d2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SultanR/SultanR_SmolTulu-1.7b-Instruct/1b0bd686-fd26-441f-b280-97b10bb1449c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SultanR_SmolTulu-1.7b-Instruct/1762652579.893334", - "retrieved_timestamp": "1762652579.893334", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SultanR/SmolTulu-1.7b-Instruct", - "developer": "SultanR", - "inference_platform": "unknown", - "id": "SultanR/SmolTulu-1.7b-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6540867121459949 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3713086260572204 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07930513595166164 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35403125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17104388297872342 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.711 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SultanR/SultanR_SmolTulu-1.7b-Reinforced/224b4cbc-e36c-4f68-9918-edbdaf947191.json b/leaderboard_data/HFOpenLLMv2/SultanR/SultanR_SmolTulu-1.7b-Reinforced/224b4cbc-e36c-4f68-9918-edbdaf947191.json deleted file mode 100644 index b559221a06efe4f319389da0b1e64d995186a2d9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SultanR/SultanR_SmolTulu-1.7b-Reinforced/224b4cbc-e36c-4f68-9918-edbdaf947191.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SultanR_SmolTulu-1.7b-Reinforced/1762652579.893585", - "retrieved_timestamp": "1762652579.893586", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SultanR/SmolTulu-1.7b-Reinforced", - "developer": "SultanR", - "inference_platform": "unknown", - "id": "SultanR/SmolTulu-1.7b-Reinforced" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6790659893526954 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3551868188444029 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17627992021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.711 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/SultanR/SultanR_SmolTulu-1.7b-it-v0/22ea218a-e3be-4e05-9a94-af716bb3a624.json b/leaderboard_data/HFOpenLLMv2/SultanR/SultanR_SmolTulu-1.7b-it-v0/22ea218a-e3be-4e05-9a94-af716bb3a624.json deleted file mode 100644 index f60ab4a0e241f6cfe9976e5706d03ab491078cfe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/SultanR/SultanR_SmolTulu-1.7b-it-v0/22ea218a-e3be-4e05-9a94-af716bb3a624.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SultanR_SmolTulu-1.7b-it-v0/1762652579.8938031", - "retrieved_timestamp": "1762652579.8938031", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SultanR/SmolTulu-1.7b-it-v0", - "developer": "SultanR", - "inference_platform": "unknown", - "id": "SultanR/SmolTulu-1.7b-it-v0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6540867121459949 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3713086260572204 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07930513595166164 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35403125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17104388297872342 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.711 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBA-123/a469604f-f755-46e0-8b1c-db4a365dec34.json b/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBA-123/a469604f-f755-46e0-8b1c-db4a365dec34.json deleted file mode 100644 index e276ae9a6400cb11a4b49d6561eb4032e10ffa34..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBA-123/a469604f-f755-46e0-8b1c-db4a365dec34.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Supichi_BBA-123/1762652579.894015", - "retrieved_timestamp": "1762652579.894016", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Supichi/BBA-123", - "developer": "Supichi", - "inference_platform": "unknown", - "id": "Supichi/BBA-123" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2079548930171944 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2920111436321769 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34990625000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11668882978723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 17.161 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBA99/fa793cb5-5522-4777-8d6f-e4719a51f767.json b/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBA99/fa793cb5-5522-4777-8d6f-e4719a51f767.json deleted file mode 100644 index 5d13813d0abeb69a0acdfda4dc7fc4acbf9b4026..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBA99/fa793cb5-5522-4777-8d6f-e4719a51f767.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Supichi_BBA99/1762652579.8942661", - "retrieved_timestamp": "1762652579.8942661", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Supichi/BBA99", - "developer": "Supichi", - "inference_platform": "unknown", - "id": "Supichi/BBA99" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14066011516110588 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2768958340020912 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32184375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11120345744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 17.161 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAIK29/de5f2ab9-f1d2-49bc-9771-41b9da1bdfa3.json b/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAIK29/de5f2ab9-f1d2-49bc-9771-41b9da1bdfa3.json deleted file mode 100644 index 59abdbd1db426a1b3f07ea6420672c42c3b23d20..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAIK29/de5f2ab9-f1d2-49bc-9771-41b9da1bdfa3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Supichi_BBAIK29/1762652579.89447", - "retrieved_timestamp": "1762652579.894471", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Supichi/BBAIK29", - "developer": "Supichi", - "inference_platform": "unknown", - "id": "Supichi/BBAIK29" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45884807865352817 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5589641249478369 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3678247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45008333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4468916223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_250_Xia0_gZ/068a06f4-3fdc-495f-b7e4-0effebe24e42.json b/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_250_Xia0_gZ/068a06f4-3fdc-495f-b7e4-0effebe24e42.json deleted file mode 100644 index 2fa4d38cd01ac80dbf957ad41e561e65701ebe0b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_250_Xia0_gZ/068a06f4-3fdc-495f-b7e4-0effebe24e42.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Supichi_BBAI_250_Xia0_gZ/1762652579.894933", - "retrieved_timestamp": "1762652579.894933", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Supichi/BBAI_250_Xia0_gZ", - "developer": "Supichi", - "inference_platform": "unknown", - "id": "Supichi/BBAI_250_Xia0_gZ" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4685401401614383 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5567682997527722 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640483383685801 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4579270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4464760638297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_275_Tsunami_gZ/173028b9-03e3-44d7-a7e9-2c0c5c6f4b4e.json b/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_275_Tsunami_gZ/173028b9-03e3-44d7-a7e9-2c0c5c6f4b4e.json deleted file mode 100644 index aad977057fc5952a3cf473dcbc42fb4ef324caf2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_275_Tsunami_gZ/173028b9-03e3-44d7-a7e9-2c0c5c6f4b4e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Supichi_BBAI_275_Tsunami_gZ/1762652579.895135", - "retrieved_timestamp": "1762652579.895135", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Supichi/BBAI_275_Tsunami_gZ", - "developer": "Supichi", - "inference_platform": "unknown", - "id": "Supichi/BBAI_275_Tsunami_gZ" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5369586031729146 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5531259476127334 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3285498489425982 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44478124999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44921875 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_525_Tsu_gZ_Xia0/6b6b273e-9cf0-405e-b1e4-5fdbd2ae16d9.json b/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_525_Tsu_gZ_Xia0/6b6b273e-9cf0-405e-b1e4-5fdbd2ae16d9.json deleted file mode 100644 index 3baa32beb06e881fd252be60f888f7b36e7818aa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_525_Tsu_gZ_Xia0/6b6b273e-9cf0-405e-b1e4-5fdbd2ae16d9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Supichi_BBAI_525_Tsu_gZ_Xia0/1762652579.8953412", - "retrieved_timestamp": "1762652579.8953412", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Supichi/BBAI_525_Tsu_gZ_Xia0", - "developer": "Supichi", - "inference_platform": "unknown", - "id": "Supichi/BBAI_525_Tsu_gZ_Xia0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5338612658856279 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5561933633430705 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3429003021148036 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44744791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44772273936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_78B_Calme_3_1_Ties/a9c4a482-6b02-4cf6-a7d5-3e16334df634.json b/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_78B_Calme_3_1_Ties/a9c4a482-6b02-4cf6-a7d5-3e16334df634.json deleted file mode 100644 index 68c62d73ac2c6d5f8f6f014a0823b705cf6003af..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_78B_Calme_3_1_Ties/a9c4a482-6b02-4cf6-a7d5-3e16334df634.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Supichi_BBAI_78B_Calme_3_1_Ties/1762652579.895541", - "retrieved_timestamp": "1762652579.895541", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Supichi/BBAI_78B_Calme_3_1_Ties", - "developer": "Supichi", - "inference_platform": "unknown", - "id": "Supichi/BBAI_78B_Calme_3_1_Ties" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18280052482967415 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28281264175951776 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22902684563758388 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30996874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11436170212765957 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 27.06 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_QWEEN_V000000_LUMEN_14B/57fd3fdc-dfdd-44ee-8c30-dc5ce4a0df8d.json b/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_QWEEN_V000000_LUMEN_14B/57fd3fdc-dfdd-44ee-8c30-dc5ce4a0df8d.json deleted file mode 100644 index 7fd74fb8160b747e706f832479f0a117bb583d0a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_QWEEN_V000000_LUMEN_14B/57fd3fdc-dfdd-44ee-8c30-dc5ce4a0df8d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Supichi_BBAI_QWEEN_V000000_LUMEN_14B/1762652579.895749", - "retrieved_timestamp": "1762652579.8957498", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Supichi/BBAI_QWEEN_V000000_LUMEN_14B", - "developer": "Supichi", - "inference_platform": "unknown", - "id": "Supichi/BBAI_QWEEN_V000000_LUMEN_14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18145188100905596 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22972580681005383 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23154362416107382 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3445416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11602393617021277 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 10.366 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_HF_TOKEN/cd0ccaff-e1b3-4c11-a8a0-37137d0386e2.json b/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_HF_TOKEN/cd0ccaff-e1b3-4c11-a8a0-37137d0386e2.json deleted file mode 100644 index 40c0d35dff5b1a20ae66812b58ef19308447d255..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_HF_TOKEN/cd0ccaff-e1b3-4c11-a8a0-37137d0386e2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Supichi_HF_TOKEN/1762652579.895958", - "retrieved_timestamp": "1762652579.895958", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Supichi/HF_TOKEN", - "developer": "Supichi", - "inference_platform": "unknown", - "id": "Supichi/HF_TOKEN" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1379872072766925 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2763924734767205 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32717708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11095412234042554 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 17.161 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_NJS26/f336c7ee-2275-4045-a227-1a7abbaebf63.json b/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_NJS26/f336c7ee-2275-4045-a227-1a7abbaebf63.json deleted file mode 100644 index 9f4e574f6fdc2e13f920f7c2dd76ce41dddf8a00..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_NJS26/f336c7ee-2275-4045-a227-1a7abbaebf63.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Supichi_NJS26/1762652579.8961651", - "retrieved_timestamp": "1762652579.8961651", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Supichi/NJS26", - "developer": "Supichi", - "inference_platform": "unknown", - "id": "Supichi/NJS26" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04481331755298164 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4780152929488641 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036901595744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Svak/Svak_MN-12B-Inferor-v0.0/5bb52ed5-e59a-4e60-a6eb-9e9322d95ccc.json b/leaderboard_data/HFOpenLLMv2/Svak/Svak_MN-12B-Inferor-v0.0/5bb52ed5-e59a-4e60-a6eb-9e9322d95ccc.json deleted file mode 100644 index f8e837b584b6ded45f933108873b36f73e82af88..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Svak/Svak_MN-12B-Inferor-v0.0/5bb52ed5-e59a-4e60-a6eb-9e9322d95ccc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Svak_MN-12B-Inferor-v0.0/1762652579.896373", - "retrieved_timestamp": "1762652579.896374", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Svak/MN-12B-Inferor-v0.0", - "developer": "Svak", - "inference_platform": "unknown", - "id": "Svak/MN-12B-Inferor-v0.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5707555951541909 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5195010930589931 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46388541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3558843085106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Svak/Svak_MN-12B-Inferor-v0.1/9bfe838e-a568-4933-b03d-3e9ae6d2026d.json b/leaderboard_data/HFOpenLLMv2/Svak/Svak_MN-12B-Inferor-v0.1/9bfe838e-a568-4933-b03d-3e9ae6d2026d.json deleted file mode 100644 index 17d95df798a8f6d7901266b262271c61689040f8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Svak/Svak_MN-12B-Inferor-v0.1/9bfe838e-a568-4933-b03d-3e9ae6d2026d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Svak_MN-12B-Inferor-v0.1/1762652579.8966348", - "retrieved_timestamp": "1762652579.896636", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Svak/MN-12B-Inferor-v0.1", - "developer": "Svak", - "inference_platform": "unknown", - "id": "Svak/MN-12B-Inferor-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6346527214457639 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5146762089838804 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12613293051359517 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4350833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3661901595744681 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Syed-Hasan-8503/Syed-Hasan-8503_Phi-3-mini-4K-instruct-cpo-simpo/58bacacb-2936-4685-b0ba-dc8f47f3232a.json b/leaderboard_data/HFOpenLLMv2/Syed-Hasan-8503/Syed-Hasan-8503_Phi-3-mini-4K-instruct-cpo-simpo/58bacacb-2936-4685-b0ba-dc8f47f3232a.json deleted file mode 100644 index 689bfdbd01d4b7882022fd0cd6680c3b0e905e72..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Syed-Hasan-8503/Syed-Hasan-8503_Phi-3-mini-4K-instruct-cpo-simpo/58bacacb-2936-4685-b0ba-dc8f47f3232a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Syed-Hasan-8503_Phi-3-mini-4K-instruct-cpo-simpo/1762652579.896852", - "retrieved_timestamp": "1762652579.896853", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Syed-Hasan-8503/Phi-3-mini-4K-instruct-cpo-simpo", - "developer": "Syed-Hasan-8503", - "inference_platform": "unknown", - "id": "Syed-Hasan-8503/Phi-3-mini-4K-instruct-cpo-simpo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5714049832222946 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5681534123661078 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15709969788519637 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3963541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38605385638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V1-P1/5bedfdac-2976-4a21-9ae2-a5b5b06e1e14.json b/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V1-P1/5bedfdac-2976-4a21-9ae2-a5b5b06e1e14.json deleted file mode 100644 index 2e6246333c67893c7c3b8fab9492965b99e7331a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V1-P1/5bedfdac-2976-4a21-9ae2-a5b5b06e1e14.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V1-P1/1762652579.897121", - "retrieved_timestamp": "1762652579.8971221", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/KRONOS-8B-V1-P1", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/KRONOS-8B-V1-P1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7849783020164276 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.508544756293663 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19788519637462235 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3881041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3759973404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V1-P2/a5d0fc39-cac5-409f-8375-636ef97fba8c.json b/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V1-P2/a5d0fc39-cac5-409f-8375-636ef97fba8c.json deleted file mode 100644 index 87d83ff5a170f899b5de5d0d388acfe688c8b5f2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V1-P2/a5d0fc39-cac5-409f-8375-636ef97fba8c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V1-P2/1762652579.897378", - "retrieved_timestamp": "1762652579.8973792", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/KRONOS-8B-V1-P2", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/KRONOS-8B-V1-P2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6724213974476612 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47717566218002166 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3567604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3453291223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V1-P3/14eb1867-80a0-47f9-9b2a-f0a05f683fb4.json b/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V1-P3/14eb1867-80a0-47f9-9b2a-f0a05f683fb4.json deleted file mode 100644 index 4d0af8ffd6f593da390111228951bb47a785ef13..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V1-P3/14eb1867-80a0-47f9-9b2a-f0a05f683fb4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V1-P3/1762652579.897578", - "retrieved_timestamp": "1762652579.897579", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/KRONOS-8B-V1-P3", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/KRONOS-8B-V1-P3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7137373280673058 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5127875870036823 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34050864361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V2/ff4c64ec-f44b-4bec-9534-bafa632a0e3f.json b/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V2/ff4c64ec-f44b-4bec-9534-bafa632a0e3f.json deleted file mode 100644 index 446292f96d5bed2d2e1f8262baf49a4429114306..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V2/ff4c64ec-f44b-4bec-9534-bafa632a0e3f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V2/1762652579.897814", - "retrieved_timestamp": "1762652579.897815", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/KRONOS-8B-V2", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/KRONOS-8B-V2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5180243974875552 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.513268555595521 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22658610271903323 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38286458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3737533244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V3/fc5613f1-09bc-4b82-89f4-4ee671cad5bf.json b/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V3/fc5613f1-09bc-4b82-89f4-4ee671cad5bf.json deleted file mode 100644 index 4aa1a5c6cd1c60d1bcea75d465b44603affd4af0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V3/fc5613f1-09bc-4b82-89f4-4ee671cad5bf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V3/1762652579.8980181", - "retrieved_timestamp": "1762652579.898019", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/KRONOS-8B-V3", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/KRONOS-8B-V3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5474751437297483 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.511865544689898 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3922291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3738364361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V4/af8665b4-d9be-4243-9c8d-0b43e7abd540.json b/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V4/af8665b4-d9be-4243-9c8d-0b43e7abd540.json deleted file mode 100644 index 58b32f6ddeb75646f9d1f6049280ad469a24af4e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V4/af8665b4-d9be-4243-9c8d-0b43e7abd540.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V4/1762652579.898447", - "retrieved_timestamp": "1762652579.898448", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/KRONOS-8B-V4", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/KRONOS-8B-V4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7889499860370484 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5092470034846742 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19486404833836857 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38295833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37857380319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V5/290206b5-0e46-4f92-a2bd-f2c53ef3d147.json b/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V5/290206b5-0e46-4f92-a2bd-f2c53ef3d147.json deleted file mode 100644 index 81073668ef261d162c9c2911403647c2ad2922b6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V5/290206b5-0e46-4f92-a2bd-f2c53ef3d147.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V5/1762652579.8986921", - "retrieved_timestamp": "1762652579.898693", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/KRONOS-8B-V5", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/KRONOS-8B-V5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5405058577906621 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5088651598969166 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2688821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40546875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37591422872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V6/78813c35-3eaa-4ae6-9099-bf79efb8b0df.json b/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V6/78813c35-3eaa-4ae6-9099-bf79efb8b0df.json deleted file mode 100644 index 78f19fc8088503f793e1c1606b16f9d465414d61..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V6/78813c35-3eaa-4ae6-9099-bf79efb8b0df.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V6/1762652579.898935", - "retrieved_timestamp": "1762652579.898936", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/KRONOS-8B-V6", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/KRONOS-8B-V6" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7022467054083166 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5033606149499412 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41210416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3501496010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V7/1358fee5-3874-4997-b1f0-6e93c6c5e9c0.json b/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V7/1358fee5-3874-4997-b1f0-6e93c6c5e9c0.json deleted file mode 100644 index aa2d8d0aa6414a20bffa3e2403b7c4f0c96b964a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V7/1358fee5-3874-4997-b1f0-6e93c6c5e9c0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V7/1762652579.899169", - "retrieved_timestamp": "1762652579.8991702", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/KRONOS-8B-V7", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/KRONOS-8B-V7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3529102780622083 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4526219443939161 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11102719033232629 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36711458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2696974734042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V8/57a4ddc6-0447-4840-94bc-5bb136025aab.json b/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V8/57a4ddc6-0447-4840-94bc-5bb136025aab.json deleted file mode 100644 index 76cf489b05515a37bb3a91642bbc821f327070be..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V8/57a4ddc6-0447-4840-94bc-5bb136025aab.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V8/1762652579.899387", - "retrieved_timestamp": "1762652579.8993878", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/KRONOS-8B-V8", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/KRONOS-8B-V8" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7770349339751859 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5094406613555632 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20468277945619334 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3868958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37824135638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V9/6fbb6156-196d-4523-900e-35316100d3b9.json b/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V9/6fbb6156-196d-4523-900e-35316100d3b9.json deleted file mode 100644 index 7dd5a10bfae9fb24dd1938e4d4fc2e99b6d17cd1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V9/6fbb6156-196d-4523-900e-35316100d3b9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V9/1762652579.8996658", - "retrieved_timestamp": "1762652579.899667", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/KRONOS-8B-V9", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/KRONOS-8B-V9" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7855778224001206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5099211908307056 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1986404833836858 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3868020833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3751662234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_Llama-3.1-8B-Instruct-Zeus/38e620aa-c577-4b14-bebd-e98ebcbe48b2.json b/leaderboard_data/HFOpenLLMv2/T145/T145_Llama-3.1-8B-Instruct-Zeus/38e620aa-c577-4b14-bebd-e98ebcbe48b2.json deleted file mode 100644 index 9f324330ad844ba0d9cdfc63c074ad72f71c877f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_Llama-3.1-8B-Instruct-Zeus/38e620aa-c577-4b14-bebd-e98ebcbe48b2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_Llama-3.1-8B-Instruct-Zeus/1762652579.899903", - "retrieved_timestamp": "1762652579.899904", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/Llama-3.1-8B-Instruct-Zeus", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/Llama-3.1-8B-Instruct-Zeus" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7941207108250552 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5173982439996302 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19561933534743203 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39762499999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38929521276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_Meta-Llama-3.1-8B-Instruct-TIES/15b92d44-3d68-4c6a-bddd-5676ebda2e10.json b/leaderboard_data/HFOpenLLMv2/T145/T145_Meta-Llama-3.1-8B-Instruct-TIES/15b92d44-3d68-4c6a-bddd-5676ebda2e10.json deleted file mode 100644 index ac8d9e822f6a88ef76492f5f776b566189d47fd4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_Meta-Llama-3.1-8B-Instruct-TIES/15b92d44-3d68-4c6a-bddd-5676ebda2e10.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_Meta-Llama-3.1-8B-Instruct-TIES/1762652579.900369", - "retrieved_timestamp": "1762652579.900369", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/Meta-Llama-3.1-8B-Instruct-TIES", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/Meta-Llama-3.1-8B-Instruct-TIES" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5423542866261519 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5070111385564763 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20996978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3842916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37799202127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V10/464bae3d-bd06-4264-a939-59ab8e562ca6.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V10/464bae3d-bd06-4264-a939-59ab8e562ca6.json deleted file mode 100644 index ce6cae1100c1a1dffcc4f50620b46225a0fb30f7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V10/464bae3d-bd06-4264-a939-59ab8e562ca6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V10/1762652579.900583", - "retrieved_timestamp": "1762652579.900584", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V10", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V10" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7706651684197928 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5269758270442659 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21148036253776434 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38978124999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.390375664893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V11/a6eedf29-9ec8-4b03-a8f5-c9c4e2bda688.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V11/a6eedf29-9ec8-4b03-a8f5-c9c4e2bda688.json deleted file mode 100644 index d416e9837eb5ceda8fd4be3505fe498f70e40f24..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V11/a6eedf29-9ec8-4b03-a8f5-c9c4e2bda688.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V11/1762652579.900793", - "retrieved_timestamp": "1762652579.900793", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V11", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V11" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8099575792231279 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5161982586505715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19637462235649547 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38066666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38838098404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V12/1ab70352-9bda-47c8-8bdf-90860934cfc7.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V12/1ab70352-9bda-47c8-8bdf-90860934cfc7.json deleted file mode 100644 index e4c5395006a6c8f57042e9728e8d22c0cf16c176..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V12/1ab70352-9bda-47c8-8bdf-90860934cfc7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V12/1762652579.901004", - "retrieved_timestamp": "1762652579.901004", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V12", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V12" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.781556270695089 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5253912026310238 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21148036253776434 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38584375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3912067819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V13-abliterated/7c39d06a-dafe-40a7-b5a1-dca14dcadff2.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V13-abliterated/7c39d06a-dafe-40a7-b5a1-dca14dcadff2.json deleted file mode 100644 index 33c8066c7452450d8e697b8326ba1e94b32f4bc8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V13-abliterated/7c39d06a-dafe-40a7-b5a1-dca14dcadff2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V13-abliterated/1762652579.901429", - "retrieved_timestamp": "1762652579.9014301", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V13-abliterated", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V13-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7877509452696623 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5197597316957202 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17900302114803626 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3871458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38721742021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V13/10823e50-9478-4a8a-83cf-5169a0bc1f1f.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V13/10823e50-9478-4a8a-83cf-5169a0bc1f1f.json deleted file mode 100644 index 524a1eccf3c326435b2d2546f2deac4e82da83a5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V13/10823e50-9478-4a8a-83cf-5169a0bc1f1f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V13/1762652579.90121", - "retrieved_timestamp": "1762652579.9012108", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V13", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V13" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7904238531540756 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5277128851736589 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21374622356495468 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38447916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39112367021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V14/2b0eb3f5-d35e-41ea-ba69-18c0b8a3e1e1.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V14/2b0eb3f5-d35e-41ea-ba69-18c0b8a3e1e1.json deleted file mode 100644 index 030e9224dc44ea1921c719f0839b801191147b3a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V14/2b0eb3f5-d35e-41ea-ba69-18c0b8a3e1e1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V14/1762652579.901653", - "retrieved_timestamp": "1762652579.901653", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V14", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V14" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.770939994769434 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5274593322517976 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3844479166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3913730053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V15/3e1be4f3-478f-4061-9856-f1beb0a749de.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V15/3e1be4f3-478f-4061-9856-f1beb0a749de.json deleted file mode 100644 index 08d4edf5149e8bb30013ed6dd8edb26faae84bec..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V15/3e1be4f3-478f-4061-9856-f1beb0a749de.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V15/1762652579.901858", - "retrieved_timestamp": "1762652579.901859", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V15", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V15" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.701272623306161 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5537552380544757 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23036253776435045 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40199999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40591755319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V16/7beef3ca-6423-4a81-836d-0e4cdc4af973.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V16/7beef3ca-6423-4a81-836d-0e4cdc4af973.json deleted file mode 100644 index e7b1e0840fc5b0d16705805f072b9bc3a290c8d0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V16/7beef3ca-6423-4a81-836d-0e4cdc4af973.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V16/1762652579.9020631", - "retrieved_timestamp": "1762652579.902064", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V16", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V16" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7925471083392066 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5265817990313368 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3950833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39261968085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V17-abliterated-V2/3344d19c-c79b-48b3-be5b-f5f27d6920ce.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V17-abliterated-V2/3344d19c-c79b-48b3-be5b-f5f27d6920ce.json deleted file mode 100644 index e9165158f6326d4bf8a3c8f7a0402ca4bbdbfd71..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V17-abliterated-V2/3344d19c-c79b-48b3-be5b-f5f27d6920ce.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V17-abliterated-V2/1762652579.902674", - "retrieved_timestamp": "1762652579.902674", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V17-abliterated-V2", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V17-abliterated-V2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6532123654126606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49280119619174295 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3407291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34017619680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V17-abliterated-V4/bf9c0bfa-98e5-45b2-8819-0911af81d78f.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V17-abliterated-V4/bf9c0bfa-98e5-45b2-8819-0911af81d78f.json deleted file mode 100644 index 1d2a32fdd930a438f278bf062674908c101ed23b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V17-abliterated-V4/bf9c0bfa-98e5-45b2-8819-0911af81d78f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V17-abliterated-V4/1762652579.902891", - "retrieved_timestamp": "1762652579.902891", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V17-abliterated-V4", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V17-abliterated-V4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7228298691915229 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5169216944225185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4187083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37741023936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V17-abliterated/35f89ab6-c6c9-41cd-9296-af4921490c3f.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V17-abliterated/35f89ab6-c6c9-41cd-9296-af4921490c3f.json deleted file mode 100644 index 670e8d3924c325242a637f50d413a804b98789a0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V17-abliterated/35f89ab6-c6c9-41cd-9296-af4921490c3f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V17-abliterated/1762652579.902467", - "retrieved_timestamp": "1762652579.9024682", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V17-abliterated", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V17-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7576009432749549 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.520041374505222 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42692708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36220079787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.594 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V17/0368a3ba-e461-45d1-a037-3b9160a8efbb.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V17/0368a3ba-e461-45d1-a037-3b9160a8efbb.json deleted file mode 100644 index 09a95a6d3225888fdb0382ae804ca10e30c729f1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V17/0368a3ba-e461-45d1-a037-3b9160a8efbb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V17/1762652579.902262", - "retrieved_timestamp": "1762652579.902263", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V17", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V17" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7940708431406447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.525086643033107 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40162499999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39345079787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V18/e5d250e7-8d0a-48b5-aaad-3d1da02eab00.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V18/e5d250e7-8d0a-48b5-aaad-3d1da02eab00.json deleted file mode 100644 index e90f76e73bba087be80034c57202eff861fbf70e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V18/e5d250e7-8d0a-48b5-aaad-3d1da02eab00.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V18/1762652579.903114", - "retrieved_timestamp": "1762652579.903115", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V18", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V18" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7834046995305788 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5269802862530547 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21827794561933533 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40429166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39419880319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V19/0392cccb-0a1c-486e-876a-1404f14a1080.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V19/0392cccb-0a1c-486e-876a-1404f14a1080.json deleted file mode 100644 index b2d34b30a6eb11d4986fb6b9f53131cf9ef04c7a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V19/0392cccb-0a1c-486e-876a-1404f14a1080.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V19/1762652579.903361", - "retrieved_timestamp": "1762652579.903362", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V19", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V19" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7882507302845339 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5276233222408697 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40429166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3933676861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2-ORPO/588b0fce-37cd-41f1-8eaa-50383cdc0f00.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2-ORPO/588b0fce-37cd-41f1-8eaa-50383cdc0f00.json deleted file mode 100644 index fe74092e2d285bae9102d5939ab58b19d095a34b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2-ORPO/588b0fce-37cd-41f1-8eaa-50383cdc0f00.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V2-ORPO/1762652579.903775", - "retrieved_timestamp": "1762652579.903776", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V2-ORPO", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V2-ORPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7186830941900824 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5075246906772 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18277945619335348 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39349999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3677692819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2-abliterated/926fb6ed-0750-4d04-8e3c-da470e236db2.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2-abliterated/926fb6ed-0750-4d04-8e3c-da470e236db2.json deleted file mode 100644 index 6a709041def1dfd0aa3ca6341b14c60d74c9e950..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2-abliterated/926fb6ed-0750-4d04-8e3c-da470e236db2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V2-abliterated/1762652579.9039848", - "retrieved_timestamp": "1762652579.903986", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V2-abliterated", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V2-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7895495064207414 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5128868622210663 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21148036253776434 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3910833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38248005319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2/e64503c5-d9ce-4544-8caf-0fec97a2b592.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2/e64503c5-d9ce-4544-8caf-0fec97a2b592.json deleted file mode 100644 index 8bb5d48747fbf138753245d96c57e1d69006d670..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2/e64503c5-d9ce-4544-8caf-0fec97a2b592.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V2/1762652579.9035678", - "retrieved_timestamp": "1762652579.903569", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V2", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8029384255996312 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5194405455747161 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21601208459214502 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3910208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3896276595744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V20/0ba8bca5-3a61-499a-8e2d-ca84f52ef654.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V20/0ba8bca5-3a61-499a-8e2d-ca84f52ef654.json deleted file mode 100644 index 7e5f033d32324ad1c35eacc7ae6de826e0c9f63e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V20/0ba8bca5-3a61-499a-8e2d-ca84f52ef654.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V20/1762652579.904202", - "retrieved_timestamp": "1762652579.904203", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V20", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V20" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7955945779420825 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5244005058415827 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40432291666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3929521276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V21/380a44ec-387a-4f34-92c2-18fc7a8d5ce0.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V21/380a44ec-387a-4f34-92c2-18fc7a8d5ce0.json deleted file mode 100644 index ea4c316eaf08328221985fddb83d3d8077b0755c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V21/380a44ec-387a-4f34-92c2-18fc7a8d5ce0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V21/1762652579.904516", - "retrieved_timestamp": "1762652579.904516", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V21", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V21" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3785145635801894 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33975753940458464 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1593655589123867 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32615625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17137632978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V22/3f44a1c0-b70a-4712-a0c1-bdf3318b270c.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V22/3f44a1c0-b70a-4712-a0c1-bdf3318b270c.json deleted file mode 100644 index 665c3135dcb724763e107d50e53ff7260d8d7d30..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V22/3f44a1c0-b70a-4712-a0c1-bdf3318b270c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V22/1762652579.9047282", - "retrieved_timestamp": "1762652579.9047291", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V22", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V22" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7995163942782927 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5244915522507715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22280966767371602 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3989583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3937832446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V23/f83b7584-0e52-4658-ae15-f295064b9111.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V23/f83b7584-0e52-4658-ae15-f295064b9111.json deleted file mode 100644 index 568605e49c918c9c27ca10f55e1f8e2c03a9a867..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V23/f83b7584-0e52-4658-ae15-f295064b9111.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V23/1762652579.904932", - "retrieved_timestamp": "1762652579.9049332", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V23", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V23" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7621222799948582 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.519500470668349 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18202416918429004 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3921979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3666057180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V24/51368b21-1b48-4c07-9b09-8cae0786200b.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V24/51368b21-1b48-4c07-9b09-8cae0786200b.json deleted file mode 100644 index 7e3d0d6adfb55379312c12e96e9408104c6aabe4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V24/51368b21-1b48-4c07-9b09-8cae0786200b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V24/1762652579.905136", - "retrieved_timestamp": "1762652579.9051368", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V24", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V24" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5999813827311533 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4777962576721959 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14577039274924472 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3729166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32845744680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V25/52b41117-c308-4e8c-9c61-ce8e4faf778f.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V25/52b41117-c308-4e8c-9c61-ce8e4faf778f.json deleted file mode 100644 index 9e0cbcdb6cc5b2f5e901666ebfdd33f75c0c148b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V25/52b41117-c308-4e8c-9c61-ce8e4faf778f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V25/1762652579.905337", - "retrieved_timestamp": "1762652579.905338", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V25", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V25" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33202790817253774 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4546907005207668 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3488229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2884807180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V26/8ae81cea-b179-4025-916a-9bc73755de82.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V26/8ae81cea-b179-4025-916a-9bc73755de82.json deleted file mode 100644 index 45209944275a57403dd6db85a6cd323e581b48e9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V26/8ae81cea-b179-4025-916a-9bc73755de82.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V26/1762652579.905539", - "retrieved_timestamp": "1762652579.905539", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V26", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V26" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6707979272774018 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5231548583920674 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40162499999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39070811170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V27/bf31323b-bfb5-464a-b343-0605dafb5a60.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V27/bf31323b-bfb5-464a-b343-0605dafb5a60.json deleted file mode 100644 index a45533efd035d71fb193158850cea9935b88bd63..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V27/bf31323b-bfb5-464a-b343-0605dafb5a60.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V27/1762652579.9057322", - "retrieved_timestamp": "1762652579.905733", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V27", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V27" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.654361538495636 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.52303129292911 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39768749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3902094414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V28/e31561ff-779a-4ebe-b6fe-686b2895c53b.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V28/e31561ff-779a-4ebe-b6fe-686b2895c53b.json deleted file mode 100644 index da0056098b66ebc90ec3e3a43474bd753c57b7a9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V28/e31561ff-779a-4ebe-b6fe-686b2895c53b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V28/1762652579.905931", - "retrieved_timestamp": "1762652579.905931", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V28", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V28" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.635252241829457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5254256199968339 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38962499999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3902094414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V29/c383684a-2f70-46e9-ab55-4d68903613b3.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V29/c383684a-2f70-46e9-ab55-4d68903613b3.json deleted file mode 100644 index 485a9b6126d0aef31f727a6988d0e675504c4c3f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V29/c383684a-2f70-46e9-ab55-4d68903613b3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V29/1762652579.906123", - "retrieved_timestamp": "1762652579.906123", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V29", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V29" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7417640748768822 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5253330901112457 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4002604166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3920378989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2L1/015f91ef-9318-44d6-acb2-17628000c273.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2L1/015f91ef-9318-44d6-acb2-17628000c273.json deleted file mode 100644 index 1d938f1e6073c3de0c79bc10c47363f23f13fd69..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2L1/015f91ef-9318-44d6-acb2-17628000c273.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V2L1/1762652579.906316", - "retrieved_timestamp": "1762652579.906317", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V2L1", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V2L1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3191886416929303 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5013485375260267 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38819791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36377992021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2L2/8e7be46e-af57-4e88-9df5-3161110dfa66.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2L2/8e7be46e-af57-4e88-9df5-3161110dfa66.json deleted file mode 100644 index 9667f8cb5d192574cb8287e57566c90e95ca8ec8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2L2/8e7be46e-af57-4e88-9df5-3161110dfa66.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V2L2/1762652579.9065118", - "retrieved_timestamp": "1762652579.906513", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V2L2", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V2L2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8020640788662969 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5202843665402132 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20166163141993956 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39746875000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38838098404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V3/6b8fca40-f44b-45a0-bd5b-04b2fa2067a2.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V3/6b8fca40-f44b-45a0-bd5b-04b2fa2067a2.json deleted file mode 100644 index 51791f21c5e9ae76d848946c6283a5f63647aa29..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V3/6b8fca40-f44b-45a0-bd5b-04b2fa2067a2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V3/1762652579.906709", - "retrieved_timestamp": "1762652579.90671", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V3", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7886751596874072 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5265064133535374 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16767371601208458 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4016875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38040226063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V30/839ff423-8c5c-4fab-aecf-b535ee06af36.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V30/839ff423-8c5c-4fab-aecf-b535ee06af36.json deleted file mode 100644 index a7157686fc9123efb990539f84c2acfe1eb6a23a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V30/839ff423-8c5c-4fab-aecf-b535ee06af36.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V30/1762652579.907134", - "retrieved_timestamp": "1762652579.907138", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V30", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V30" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7435626360279614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5243248855841048 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15861027190332327 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4029270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3943650265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V4/9330c290-ee47-4a7d-9b8f-62903402e0e3.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V4/9330c290-ee47-4a7d-9b8f-62903402e0e3.json deleted file mode 100644 index fe4ad996b4c86b7f10f13da4c7d46d9c1c856e54..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V4/9330c290-ee47-4a7d-9b8f-62903402e0e3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V4/1762652579.9075332", - "retrieved_timestamp": "1762652579.907535", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V4", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7807317916461656 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5245974297200655 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4028958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37882313829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V6/09670c05-9463-479f-89e3-5029fd5d7ee7.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V6/09670c05-9463-479f-89e3-5029fd5d7ee7.json deleted file mode 100644 index 64a38cbdcc57b26c6d44eb590289afb5e45a66be..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V6/09670c05-9463-479f-89e3-5029fd5d7ee7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V6/1762652579.9077919", - "retrieved_timestamp": "1762652579.9077928", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V6", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V6" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7837792612490415 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5239561762634447 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20241691842900303 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4068020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37591422872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V7/c6a9173a-bacc-40bd-9572-239f9901e065.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V7/c6a9173a-bacc-40bd-9572-239f9901e065.json deleted file mode 100644 index bdefea2a3ee0a00891726faca546046848d93fcd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V7/c6a9173a-bacc-40bd-9572-239f9901e065.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V7/1762652579.908076", - "retrieved_timestamp": "1762652579.908077", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V7", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7786085364610345 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5070394117180643 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14803625377643503 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41616666666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3812333776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V8/c0035841-a312-493e-9c44-a75133e894d1.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V8/c0035841-a312-493e-9c44-a75133e894d1.json deleted file mode 100644 index 64a5ee0870ae37e20ed283ad0f2027e66704997e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V8/c0035841-a312-493e-9c44-a75133e894d1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V8/1762652579.908298", - "retrieved_timestamp": "1762652579.908299", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V8", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V8" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7913979352562313 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5064510419864701 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.421375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37608045212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V9/f5876dc1-b769-431f-84fe-365d2457902e.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V9/f5876dc1-b769-431f-84fe-365d2457902e.json deleted file mode 100644 index d3c8d112e9cc32fb75d0984c43ede71e8083dd6d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V9/f5876dc1-b769-431f-84fe-365d2457902e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V9/1762652579.908509", - "retrieved_timestamp": "1762652579.90851", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/ZEUS-8B-V9", - "developer": "T145", - "inference_platform": "unknown", - "id": "T145/ZEUS-8B-V9" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5551436854213487 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5207256346477752 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21374622356495468 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3949270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39012632978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b-chat-1m-hf/077f7956-8c9b-47ef-8c4d-40455bbb0027.json b/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b-chat-1m-hf/077f7956-8c9b-47ef-8c4d-40455bbb0027.json deleted file mode 100644 index 7c926c9d31b409d8f9c4cd9b1d7f5e9d0e68e473..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b-chat-1m-hf/077f7956-8c9b-47ef-8c4d-40455bbb0027.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/THUDM_glm-4-9b-chat-1m-hf/1762652579.9096901", - "retrieved_timestamp": "1762652579.9096909", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "THUDM/glm-4-9b-chat-1m-hf", - "developer": "THUDM", - "inference_platform": "unknown", - "id": "THUDM/glm-4-9b-chat-1m-hf" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5341106043076814 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3900953106836365 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36888541666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18143284574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GlmForCausalLM", - "params_billions": 9.484 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b-chat-1m/f0c306f0-683e-4582-81b7-f0a2c372060f.json b/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b-chat-1m/f0c306f0-683e-4582-81b7-f0a2c372060f.json deleted file mode 100644 index d5f1bcecce65d6de75520f65800363974accd6ec..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b-chat-1m/f0c306f0-683e-4582-81b7-f0a2c372060f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/THUDM_glm-4-9b-chat-1m/1762652579.909478", - "retrieved_timestamp": "1762652579.909479", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "THUDM/glm-4-9b-chat-1m", - "developer": "THUDM", - "inference_platform": "unknown", - "id": "THUDM/glm-4-9b-chat-1m" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41800578218330303 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3794583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31632313829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "ChatGLMModel", - "params_billions": 9.484 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b-chat-hf/0af9353e-10d5-42e3-8bc9-4c736720ff30.json b/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b-chat-hf/0af9353e-10d5-42e3-8bc9-4c736720ff30.json deleted file mode 100644 index 6848dd5fddc1ba2903a9bf4329e8444074cffc0e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b-chat-hf/0af9353e-10d5-42e3-8bc9-4c736720ff30.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/THUDM_glm-4-9b-chat-hf/1762652579.909895", - "retrieved_timestamp": "1762652579.909896", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "THUDM/glm-4-9b-chat-hf", - "developer": "THUDM", - "inference_platform": "unknown", - "id": "THUDM/glm-4-9b-chat-hf" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6513140688927601 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4432308604245425 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35930208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27742686170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GlmForCausalLM", - "params_billions": 9.4 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b-chat/e7c5d8ef-d480-4ab9-b698-409e5ea76cf8.json b/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b-chat/e7c5d8ef-d480-4ab9-b698-409e5ea76cf8.json deleted file mode 100644 index 6c58e016b77f523428769933412c0f7b86397577..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b-chat/e7c5d8ef-d480-4ab9-b698-409e5ea76cf8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/THUDM_glm-4-9b-chat/1762652579.909267", - "retrieved_timestamp": "1762652579.909267", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "THUDM/glm-4-9b-chat", - "developer": "THUDM", - "inference_platform": "unknown", - "id": "THUDM/glm-4-9b-chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47363884291035735 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3994270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.316655585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "ChatGLMModelM", - "params_billions": 9.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b/bd038a6c-1241-401d-962d-e033434ba735.json b/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b/bd038a6c-1241-401d-962d-e033434ba735.json deleted file mode 100644 index 787462be2eccdec56f6db35d13646d6c40d80094..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b/bd038a6c-1241-401d-962d-e033434ba735.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/THUDM_glm-4-9b/1762652579.9090161", - "retrieved_timestamp": "1762652579.9090161", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "THUDM/glm-4-9b", - "developer": "THUDM", - "inference_platform": "unknown", - "id": "THUDM/glm-4-9b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1426082793654171 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5528368141665274 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4385833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4144780585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "ChatGLMModelM", - "params_billions": 9.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/TIGER-Lab/TIGER-Lab_AceCodeRM-7B/eb1d6ce5-3b0c-477d-9ca6-2f3ff8bc4e30.json b/leaderboard_data/HFOpenLLMv2/TIGER-Lab/TIGER-Lab_AceCodeRM-7B/eb1d6ce5-3b0c-477d-9ca6-2f3ff8bc4e30.json deleted file mode 100644 index cfa3bb51544c6543e5addf90db1ff38ea9b34976..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/TIGER-Lab/TIGER-Lab_AceCodeRM-7B/eb1d6ce5-3b0c-477d-9ca6-2f3ff8bc4e30.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TIGER-Lab_AceCodeRM-7B/1762652579.9101062", - "retrieved_timestamp": "1762652579.910107", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TIGER-Lab/AceCodeRM-7B", - "developer": "TIGER-Lab", - "inference_platform": "unknown", - "id": "TIGER-Lab/AceCodeRM-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5854931581536988 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4773230085351336 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3466767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41920833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3361037234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalRM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/TIGER-Lab/TIGER-Lab_MAmmoTH2-7B-Plus/93503cc0-80aa-44b5-9155-c81cd44a9ac9.json b/leaderboard_data/HFOpenLLMv2/TIGER-Lab/TIGER-Lab_MAmmoTH2-7B-Plus/93503cc0-80aa-44b5-9155-c81cd44a9ac9.json deleted file mode 100644 index 4bb4ad4adb9c2a3e50bef1d991843d28e4aeef4d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/TIGER-Lab/TIGER-Lab_MAmmoTH2-7B-Plus/93503cc0-80aa-44b5-9155-c81cd44a9ac9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TIGER-Lab_MAmmoTH2-7B-Plus/1762652579.9110248", - "retrieved_timestamp": "1762652579.911026", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TIGER-Lab/MAmmoTH2-7B-Plus", - "developer": "TIGER-Lab", - "inference_platform": "unknown", - "id": "TIGER-Lab/MAmmoTH2-7B-Plus" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5574664113441224 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42346949888019064 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41235416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30169547872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/TeeZee/TeeZee_DoubleBagel-57B-v1.0/1315f2ad-2e39-4cab-b09a-c74d0779f895.json b/leaderboard_data/HFOpenLLMv2/TeeZee/TeeZee_DoubleBagel-57B-v1.0/1315f2ad-2e39-4cab-b09a-c74d0779f895.json deleted file mode 100644 index 676d381bb1f34517ee96ffc7b73f29f1925a0f6a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/TeeZee/TeeZee_DoubleBagel-57B-v1.0/1315f2ad-2e39-4cab-b09a-c74d0779f895.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TeeZee_DoubleBagel-57B-v1.0/1762652579.9121659", - "retrieved_timestamp": "1762652579.9121659", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TeeZee/DoubleBagel-57B-v1.0", - "developer": "TeeZee", - "inference_platform": "unknown", - "id": "TeeZee/DoubleBagel-57B-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23363342597640924 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.325078559362514 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43148958333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14777260638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 56.703 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Telugu-LLM-Labs/Telugu-LLM-Labs_Indic-gemma-2b-finetuned-sft-Navarasa-2.0/ec8a8e25-f985-40a8-80ff-0c7d7595029d.json b/leaderboard_data/HFOpenLLMv2/Telugu-LLM-Labs/Telugu-LLM-Labs_Indic-gemma-2b-finetuned-sft-Navarasa-2.0/ec8a8e25-f985-40a8-80ff-0c7d7595029d.json deleted file mode 100644 index c073d9459410b753b42c473e3469cc08dbc0d446..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Telugu-LLM-Labs/Telugu-LLM-Labs_Indic-gemma-2b-finetuned-sft-Navarasa-2.0/ec8a8e25-f985-40a8-80ff-0c7d7595029d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Telugu-LLM-Labs_Indic-gemma-2b-finetuned-sft-Navarasa-2.0/1762652579.912417", - "retrieved_timestamp": "1762652579.912417", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0", - "developer": "Telugu-LLM-Labs", - "inference_platform": "unknown", - "id": "Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21030310686755588 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3240881373468133 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3899375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12790890957446807 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Telugu-LLM-Labs/Telugu-LLM-Labs_Indic-gemma-7b-finetuned-sft-Navarasa-2.0/89d117f3-7a67-4e30-82b2-b42efaf44024.json b/leaderboard_data/HFOpenLLMv2/Telugu-LLM-Labs/Telugu-LLM-Labs_Indic-gemma-7b-finetuned-sft-Navarasa-2.0/89d117f3-7a67-4e30-82b2-b42efaf44024.json deleted file mode 100644 index c44ef73786c8b5a264c86c32f9a18f64f7373925..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Telugu-LLM-Labs/Telugu-LLM-Labs_Indic-gemma-7b-finetuned-sft-Navarasa-2.0/89d117f3-7a67-4e30-82b2-b42efaf44024.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Telugu-LLM-Labs_Indic-gemma-7b-finetuned-sft-Navarasa-2.0/1762652579.912673", - "retrieved_timestamp": "1762652579.912673", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0", - "developer": "Telugu-LLM-Labs", - "inference_platform": "unknown", - "id": "Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32368449048524583 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40229948924733394 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0256797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40832291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23503989361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/TencentARC/TencentARC_LLaMA-Pro-8B-Instruct/98ea850e-7019-4728-a558-8b1819ec47c2.json b/leaderboard_data/HFOpenLLMv2/TencentARC/TencentARC_LLaMA-Pro-8B-Instruct/98ea850e-7019-4728-a558-8b1819ec47c2.json deleted file mode 100644 index 88ca70a09f005b7a0653fef60c474f355133e3ad..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/TencentARC/TencentARC_LLaMA-Pro-8B-Instruct/98ea850e-7019-4728-a558-8b1819ec47c2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TencentARC_LLaMA-Pro-8B-Instruct/1762652579.9131231", - "retrieved_timestamp": "1762652579.913124", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TencentARC/LLaMA-Pro-8B-Instruct", - "developer": "TencentARC", - "inference_platform": "unknown", - "id": "TencentARC/LLaMA-Pro-8B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4486063644463357 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4224205282459997 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41902083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19456449468085107 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.357 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/TheDrummer/TheDrummer_Cydonia-22B-v1.2/4a3e8df4-8e21-4c7c-aec8-afe353831c3d.json b/leaderboard_data/HFOpenLLMv2/TheDrummer/TheDrummer_Cydonia-22B-v1.2/4a3e8df4-8e21-4c7c-aec8-afe353831c3d.json deleted file mode 100644 index 315c7016fee87f5b5e1dc28eb9fc6e4db83581b3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/TheDrummer/TheDrummer_Cydonia-22B-v1.2/4a3e8df4-8e21-4c7c-aec8-afe353831c3d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TheDrummer_Cydonia-22B-v1.2/1762652579.9138188", - "retrieved_timestamp": "1762652579.9138198", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TheDrummer/Cydonia-22B-v1.2", - "developer": "TheDrummer", - "inference_platform": "unknown", - "id": "TheDrummer/Cydonia-22B-v1.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5635114828654637 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.580856074392761 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40217708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4140625 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/TheDrummer/TheDrummer_Ministrations-8B-v1/21d5973e-d827-4bd6-b050-346da350a0aa.json b/leaderboard_data/HFOpenLLMv2/TheDrummer/TheDrummer_Ministrations-8B-v1/21d5973e-d827-4bd6-b050-346da350a0aa.json deleted file mode 100644 index 6de98210c9ac79183ecc4db810e6603af9c9b4eb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/TheDrummer/TheDrummer_Ministrations-8B-v1/21d5973e-d827-4bd6-b050-346da350a0aa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TheDrummer_Ministrations-8B-v1/1762652579.9148722", - "retrieved_timestamp": "1762652579.9148731", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TheDrummer/Ministrations-8B-v1", - "developer": "TheDrummer", - "inference_platform": "unknown", - "id": "TheDrummer/Ministrations-8B-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28219346888478125 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48766312602251366 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18429003021148035 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44490625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36436170212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.02 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/TheDrummer/TheDrummer_Rocinante-12B-v1/f21e98c1-5535-4cb4-a9f0-541e49aff795.json b/leaderboard_data/HFOpenLLMv2/TheDrummer/TheDrummer_Rocinante-12B-v1/f21e98c1-5535-4cb4-a9f0-541e49aff795.json deleted file mode 100644 index 2da455ecddaabe46e6a419f591218f11c230ec26..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/TheDrummer/TheDrummer_Rocinante-12B-v1/f21e98c1-5535-4cb4-a9f0-541e49aff795.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TheDrummer_Rocinante-12B-v1/1762652579.915099", - "retrieved_timestamp": "1762652579.9150999", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TheDrummer/Rocinante-12B-v1", - "developer": "TheDrummer", - "inference_platform": "unknown", - "id": "TheDrummer/Rocinante-12B-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6076499244227538 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5065452085797449 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40171874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34773936170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/TheDrunkenSnail/TheDrunkenSnail_Daughter-of-Rhodia-12B/0f1c48a7-2a20-40c8-88e8-bdfdc3cdad40.json b/leaderboard_data/HFOpenLLMv2/TheDrunkenSnail/TheDrunkenSnail_Daughter-of-Rhodia-12B/0f1c48a7-2a20-40c8-88e8-bdfdc3cdad40.json deleted file mode 100644 index adb02b5d5dd7f5d113acf7e3e1ca1973ba600d61..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/TheDrunkenSnail/TheDrunkenSnail_Daughter-of-Rhodia-12B/0f1c48a7-2a20-40c8-88e8-bdfdc3cdad40.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TheDrunkenSnail_Daughter-of-Rhodia-12B/1762652579.91594", - "retrieved_timestamp": "1762652579.9159412", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TheDrunkenSnail/Daughter-of-Rhodia-12B", - "developer": "TheDrunkenSnail", - "inference_platform": "unknown", - "id": "TheDrunkenSnail/Daughter-of-Rhodia-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6903815210308648 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5179174184876773 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12235649546827794 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43477083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3641123670212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/TheDrunkenSnail/TheDrunkenSnail_Mother-of-Rhodia-12B/2178eb24-2558-44db-aff1-7903c2e0f657.json b/leaderboard_data/HFOpenLLMv2/TheDrunkenSnail/TheDrunkenSnail_Mother-of-Rhodia-12B/2178eb24-2558-44db-aff1-7903c2e0f657.json deleted file mode 100644 index 5b0e8d22ee2e12ce6533a805ffa77a871eae8e0a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/TheDrunkenSnail/TheDrunkenSnail_Mother-of-Rhodia-12B/2178eb24-2558-44db-aff1-7903c2e0f657.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TheDrunkenSnail_Mother-of-Rhodia-12B/1762652579.9161909", - "retrieved_timestamp": "1762652579.9161909", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TheDrunkenSnail/Mother-of-Rhodia-12B", - "developer": "TheDrunkenSnail", - "inference_platform": "unknown", - "id": "TheDrunkenSnail/Mother-of-Rhodia-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6504895898438365 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49479138664574934 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12235649546827794 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41241666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35513630319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/TheDrunkenSnail/TheDrunkenSnail_Son-of-Rhodia/22c87268-7e49-42b4-9bbb-16a4b305c595.json b/leaderboard_data/HFOpenLLMv2/TheDrunkenSnail/TheDrunkenSnail_Son-of-Rhodia/22c87268-7e49-42b4-9bbb-16a4b305c595.json deleted file mode 100644 index dc8c8c2a15412781b809cb0330ff6f9759f25c81..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/TheDrunkenSnail/TheDrunkenSnail_Son-of-Rhodia/22c87268-7e49-42b4-9bbb-16a4b305c595.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TheDrunkenSnail_Son-of-Rhodia/1762652579.916397", - "retrieved_timestamp": "1762652579.916397", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TheDrunkenSnail/Son-of-Rhodia", - "developer": "TheDrunkenSnail", - "inference_platform": "unknown", - "id": "TheDrunkenSnail/Son-of-Rhodia" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7046447869430887 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5097327647725524 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4202916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3607878989361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/TheHierophant/TheHierophant_Underground-Cognitive-V0.3-test/872cc338-765c-4291-8b50-77b4bce719fd.json b/leaderboard_data/HFOpenLLMv2/TheHierophant/TheHierophant_Underground-Cognitive-V0.3-test/872cc338-765c-4291-8b50-77b4bce719fd.json deleted file mode 100644 index e25147ffaf19963e4506dce1d8ec28f1dce33a9a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/TheHierophant/TheHierophant_Underground-Cognitive-V0.3-test/872cc338-765c-4291-8b50-77b4bce719fd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TheHierophant_Underground-Cognitive-V0.3-test/1762652579.916598", - "retrieved_timestamp": "1762652579.916598", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TheHierophant/Underground-Cognitive-V0.3-test", - "developer": "TheHierophant", - "inference_platform": "unknown", - "id": "TheHierophant/Underground-Cognitive-V0.3-test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4808297539417634 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5290131900998047 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43511458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.331781914893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/TheTsar1209/TheTsar1209_nemo-carpmuscle-v0.1/8e834483-df6f-4d58-8257-f0cd1d8e3aa1.json b/leaderboard_data/HFOpenLLMv2/TheTsar1209/TheTsar1209_nemo-carpmuscle-v0.1/8e834483-df6f-4d58-8257-f0cd1d8e3aa1.json deleted file mode 100644 index 681692c2e177c5dfbee6184aaa896332dcacf0dd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/TheTsar1209/TheTsar1209_nemo-carpmuscle-v0.1/8e834483-df6f-4d58-8257-f0cd1d8e3aa1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TheTsar1209_nemo-carpmuscle-v0.1/1762652579.9168499", - "retrieved_timestamp": "1762652579.916851", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TheTsar1209/nemo-carpmuscle-v0.1", - "developer": "TheTsar1209", - "inference_platform": "unknown", - "id": "TheTsar1209/nemo-carpmuscle-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2275639746982451 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5083529697101391 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4135 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3405917553191489 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Tijmen2/Tijmen2_cosmosage-v3/f1eed2d5-89ca-4757-a5f9-9a90e811f075.json b/leaderboard_data/HFOpenLLMv2/Tijmen2/Tijmen2_cosmosage-v3/f1eed2d5-89ca-4757-a5f9-9a90e811f075.json deleted file mode 100644 index efccccc2766d126cd5375577b59b118e66758d47..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Tijmen2/Tijmen2_cosmosage-v3/f1eed2d5-89ca-4757-a5f9-9a90e811f075.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Tijmen2_cosmosage-v3/1762652579.918411", - "retrieved_timestamp": "1762652579.918412", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Tijmen2/cosmosage-v3", - "developer": "Tijmen2", - "inference_platform": "unknown", - "id": "Tijmen2/cosmosage-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44823180272787316 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4550637900339029 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4198854166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24858710106382978 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/TinyLlama/TinyLlama_TinyLlama-1.1B-Chat-v0.1/818cb0a4-7458-4cee-aca8-7cc72db341f8.json b/leaderboard_data/HFOpenLLMv2/TinyLlama/TinyLlama_TinyLlama-1.1B-Chat-v0.1/818cb0a4-7458-4cee-aca8-7cc72db341f8.json deleted file mode 100644 index 4acc4eebc6f3d909a5f49346c3ac885a6a2ab0bd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/TinyLlama/TinyLlama_TinyLlama-1.1B-Chat-v0.1/818cb0a4-7458-4cee-aca8-7cc72db341f8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama-1.1B-Chat-v0.1/1762652579.918663", - "retrieved_timestamp": "1762652579.918664", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TinyLlama/TinyLlama-1.1B-Chat-v0.1", - "developer": "TinyLlama", - "inference_platform": "unknown", - "id": "TinyLlama/TinyLlama-1.1B-Chat-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1478543597654224 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30835294748680114 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22902684563758388 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35923958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10979055851063829 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/TinyLlama/TinyLlama_TinyLlama-1.1B-Chat-v0.5/96454d40-4535-4439-87be-0ea7b55cd88a.json b/leaderboard_data/HFOpenLLMv2/TinyLlama/TinyLlama_TinyLlama-1.1B-Chat-v0.5/96454d40-4535-4439-87be-0ea7b55cd88a.json deleted file mode 100644 index 4c4ebe171cfc418f511eeb8e54ed2617863d64f7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/TinyLlama/TinyLlama_TinyLlama-1.1B-Chat-v0.5/96454d40-4535-4439-87be-0ea7b55cd88a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama-1.1B-Chat-v0.5/1762652579.918914", - "retrieved_timestamp": "1762652579.918914", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TinyLlama/TinyLlama-1.1B-Chat-v0.5", - "developer": "TinyLlama", - "inference_platform": "unknown", - "id": "TinyLlama/TinyLlama-1.1B-Chat-v0.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1633665341294432 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3105046915935697 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36612500000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10962433510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/TinyLlama/TinyLlama_TinyLlama-1.1B-Chat-v0.6/be032e7e-39b5-4153-81b9-c29115b231b4.json b/leaderboard_data/HFOpenLLMv2/TinyLlama/TinyLlama_TinyLlama-1.1B-Chat-v0.6/be032e7e-39b5-4153-81b9-c29115b231b4.json deleted file mode 100644 index 64b9a7ceb9f60def5eb15bd5158434f2d3f1fe02..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/TinyLlama/TinyLlama_TinyLlama-1.1B-Chat-v0.6/be032e7e-39b5-4153-81b9-c29115b231b4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama-1.1B-Chat-v0.6/1762652579.919127", - "retrieved_timestamp": "1762652579.919127", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TinyLlama/TinyLlama-1.1B-Chat-v0.6", - "developer": "TinyLlama", - "inference_platform": "unknown", - "id": "TinyLlama/TinyLlama-1.1B-Chat-v0.6" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15742119797692344 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3066976656166826 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34221875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11486037234042554 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/TinyLlama/TinyLlama_TinyLlama-1.1B-Chat-v1.0/0a24d7b1-44eb-4f5b-ae2f-ddee372facd5.json b/leaderboard_data/HFOpenLLMv2/TinyLlama/TinyLlama_TinyLlama-1.1B-Chat-v1.0/0a24d7b1-44eb-4f5b-ae2f-ddee372facd5.json deleted file mode 100644 index a09d007f0a919dfc793b51a731384bcf94933450..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/TinyLlama/TinyLlama_TinyLlama-1.1B-Chat-v1.0/0a24d7b1-44eb-4f5b-ae2f-ddee372facd5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama-1.1B-Chat-v1.0/1762652579.9193401", - "retrieved_timestamp": "1762652579.919341", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "developer": "TinyLlama", - "inference_platform": "unknown", - "id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0595763684800773 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3103562867491015 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35152083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11012300531914894 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ToastyPigeon/ToastyPigeon_Sto-vo-kor-12B/1c795b39-a382-4315-8b6b-626423b9ccfe.json b/leaderboard_data/HFOpenLLMv2/ToastyPigeon/ToastyPigeon_Sto-vo-kor-12B/1c795b39-a382-4315-8b6b-626423b9ccfe.json deleted file mode 100644 index f43508f3697c8a302fc4ebe8ad6a23b24dcc93ee..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ToastyPigeon/ToastyPigeon_Sto-vo-kor-12B/1c795b39-a382-4315-8b6b-626423b9ccfe.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ToastyPigeon_Sto-vo-kor-12B/1762652579.920128", - "retrieved_timestamp": "1762652579.920129", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ToastyPigeon/Sto-vo-kor-12B", - "developer": "ToastyPigeon", - "inference_platform": "unknown", - "id": "ToastyPigeon/Sto-vo-kor-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5501225636865739 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5064617128925814 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10876132930513595 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39384375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976063829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Trappu/Trappu_Magnum-Picaro-0.7-v2-12b/77871404-f2e3-46f9-8c48-808fb89442cc.json b/leaderboard_data/HFOpenLLMv2/Trappu/Trappu_Magnum-Picaro-0.7-v2-12b/77871404-f2e3-46f9-8c48-808fb89442cc.json deleted file mode 100644 index 922ffdcaaa6f62318a5c6bb327849e86e1453d4f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Trappu/Trappu_Magnum-Picaro-0.7-v2-12b/77871404-f2e3-46f9-8c48-808fb89442cc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Trappu_Magnum-Picaro-0.7-v2-12b/1762652579.920383", - "retrieved_timestamp": "1762652579.920383", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Trappu/Magnum-Picaro-0.7-v2-12b", - "developer": "Trappu", - "inference_platform": "unknown", - "id": "Trappu/Magnum-Picaro-0.7-v2-12b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.300278815764394 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5506661918828847 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47271875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35804521276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Trappu/Trappu_Nemo-Picaro-12B/37534f85-e1ae-482b-89d0-480c4bbc50e7.json b/leaderboard_data/HFOpenLLMv2/Trappu/Trappu_Nemo-Picaro-12B/37534f85-e1ae-482b-89d0-480c4bbc50e7.json deleted file mode 100644 index 42fd4f97aab3d08e28e859b799625f081f409b9d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Trappu/Trappu_Nemo-Picaro-12B/37534f85-e1ae-482b-89d0-480c4bbc50e7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Trappu_Nemo-Picaro-12B/1762652579.92064", - "retrieved_timestamp": "1762652579.92064", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Trappu/Nemo-Picaro-12B", - "developer": "Trappu", - "inference_platform": "unknown", - "id": "Trappu/Nemo-Picaro-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2577139766929525 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5489586125997546 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47259375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36045545212765956 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Tremontaine/Tremontaine_L3-12B-Lunaris-v1/51e5f1f2-a43a-4ade-9207-1b15d172ba08.json b/leaderboard_data/HFOpenLLMv2/Tremontaine/Tremontaine_L3-12B-Lunaris-v1/51e5f1f2-a43a-4ade-9207-1b15d172ba08.json deleted file mode 100644 index b5c065e2f4fd4ddc123d0372742b38358f358436..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Tremontaine/Tremontaine_L3-12B-Lunaris-v1/51e5f1f2-a43a-4ade-9207-1b15d172ba08.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Tremontaine_L3-12B-Lunaris-v1/1762652579.920848", - "retrieved_timestamp": "1762652579.920848", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Tremontaine/L3-12B-Lunaris-v1", - "developer": "Tremontaine", - "inference_platform": "unknown", - "id": "Tremontaine/L3-12B-Lunaris-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6909311737301471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5230217237244009 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08761329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3673645833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3774933510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 11.52 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Annunaki-12b/28f9e91f-b32f-4b8f-ae18-126c7bbe6e7d.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Annunaki-12b/28f9e91f-b32f-4b8f-ae18-126c7bbe6e7d.json deleted file mode 100644 index 46e556f25034da178890f366fa13853c777339ef..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Annunaki-12b/28f9e91f-b32f-4b8f-ae18-126c7bbe6e7d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Annunaki-12b/1762652579.921084", - "retrieved_timestamp": "1762652579.921084", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Annunaki-12b", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Annunaki-12b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3872070550583563 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5498969437971782 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44087499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3720910904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_BigTalker-Lite-8B/befea823-7dc5-4e69-81e3-e75c4ff117ac.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_BigTalker-Lite-8B/befea823-7dc5-4e69-81e3-e75c4ff117ac.json deleted file mode 100644 index 5a16fb39d85d2ca58f364a9e8979c4a55bb25e0f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_BigTalker-Lite-8B/befea823-7dc5-4e69-81e3-e75c4ff117ac.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_BigTalker-Lite-8B/1762652579.92133", - "retrieved_timestamp": "1762652579.921331", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/BigTalker-Lite-8B", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/BigTalker-Lite-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3689222374411007 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5308138241234059 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42084375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34308510638297873 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Chatty-Harry_V2.0/f2dcc214-e25c-4c73-97f0-4e47304df09b.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Chatty-Harry_V2.0/f2dcc214-e25c-4c73-97f0-4e47304df09b.json deleted file mode 100644 index 2691536ab97c4170c78a69c855ac7979c81d63d6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Chatty-Harry_V2.0/f2dcc214-e25c-4c73-97f0-4e47304df09b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Chatty-Harry_V2.0/1762652579.921529", - "retrieved_timestamp": "1762652579.92153", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Chatty-Harry_V2.0", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Chatty-Harry_V2.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3325520729442324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5318928049062546 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13897280966767372 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40782291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36826795212765956 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Chatty-Harry_V3.0/b9b23a78-beea-4c4b-8bb8-d5a18a05ffce.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Chatty-Harry_V3.0/b9b23a78-beea-4c4b-8bb8-d5a18a05ffce.json deleted file mode 100644 index f39117824d2dca87118b4eaa5965426bc69cacf9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Chatty-Harry_V3.0/b9b23a78-beea-4c4b-8bb8-d5a18a05ffce.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Chatty-Harry_V3.0/1762652579.9217439", - "retrieved_timestamp": "1762652579.9217439", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Chatty-Harry_V3.0", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Chatty-Harry_V3.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36749823800848413 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5526193453608234 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44084375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37017952127659576 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Chronos-Prism_V1.0/13bb7db2-9d89-4dce-950a-14ccfb3492aa.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Chronos-Prism_V1.0/13bb7db2-9d89-4dce-950a-14ccfb3492aa.json deleted file mode 100644 index 98768898290251cf239ab2034de637748bd54bf7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Chronos-Prism_V1.0/13bb7db2-9d89-4dce-950a-14ccfb3492aa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Chronos-Prism_V1.0/1762652579.921948", - "retrieved_timestamp": "1762652579.921948", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Chronos-Prism_V1.0", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Chronos-Prism_V1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3259329689667859 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5554188807010064 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4262708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36727061170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_DS-R1-Distill-Q2.5-10B-Harmony/ff136a9d-7e29-4a44-86be-c69bc115102e.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_DS-R1-Distill-Q2.5-10B-Harmony/ff136a9d-7e29-4a44-86be-c69bc115102e.json deleted file mode 100644 index 99f4ed33516393ea9e2a59af4caa493ad2da6bcf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_DS-R1-Distill-Q2.5-10B-Harmony/ff136a9d-7e29-4a44-86be-c69bc115102e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_DS-R1-Distill-Q2.5-10B-Harmony/1762652579.9225988", - "retrieved_timestamp": "1762652579.9226", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/DS-R1-Distill-Q2.5-10B-Harmony", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/DS-R1-Distill-Q2.5-10B-Harmony" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17508211545366295 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2643276743386568 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2105704697986577 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31276041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11727061170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 10.366 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_DS-R1-Distill-Q2.5-14B-Harmony_V0.1/63bc0215-741c-48ab-8ce3-d4c036c74a42.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_DS-R1-Distill-Q2.5-14B-Harmony_V0.1/63bc0215-741c-48ab-8ce3-d4c036c74a42.json deleted file mode 100644 index 58fd8b1a14a913d01a9360cc492f6a6e84d78c74..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_DS-R1-Distill-Q2.5-14B-Harmony_V0.1/63bc0215-741c-48ab-8ce3-d4c036c74a42.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_DS-R1-Distill-Q2.5-14B-Harmony_V0.1/1762652579.9228039", - "retrieved_timestamp": "1762652579.922805", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/DS-R1-Distill-Q2.5-14B-Harmony_V0.1", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/DS-R1-Distill-Q2.5-14B-Harmony_V0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4515042309959796 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5783379428926061 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5551359516616314 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5566875000000001 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4601063829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_DS-R1-Distill-Q2.5-7B-RP/5515e597-5f9f-46eb-8d3f-0482bdd69715.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_DS-R1-Distill-Q2.5-7B-RP/5515e597-5f9f-46eb-8d3f-0482bdd69715.json deleted file mode 100644 index 47f835353d8e0073626924f8c5fe20852ae4dd3a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_DS-R1-Distill-Q2.5-7B-RP/5515e597-5f9f-46eb-8d3f-0482bdd69715.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_DS-R1-Distill-Q2.5-7B-RP/1762652579.923009", - "retrieved_timestamp": "1762652579.923009", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/DS-R1-Distill-Q2.5-7B-RP", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/DS-R1-Distill-Q2.5-7B-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34454248061809334 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43834886662348205 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46827794561933533 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40302083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2890625 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Dark-Chivalry_V1.0/ed3b441b-272c-4bc4-8839-aa6055a6ccbc.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Dark-Chivalry_V1.0/ed3b441b-272c-4bc4-8839-aa6055a6ccbc.json deleted file mode 100644 index c0b531106ed1c09511c865a90483115a2865eb47..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Dark-Chivalry_V1.0/ed3b441b-272c-4bc4-8839-aa6055a6ccbc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Dark-Chivalry_V1.0/1762652579.923868", - "retrieved_timestamp": "1762652579.923869", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Dark-Chivalry_V1.0", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Dark-Chivalry_V1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4325700253106203 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4974207759950637 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4181770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34441489361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Distilled-DarkPlanet-Allades-8B/2d57a30c-8a0e-4f18-bb2d-6bf4536bbc86.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Distilled-DarkPlanet-Allades-8B/2d57a30c-8a0e-4f18-bb2d-6bf4536bbc86.json deleted file mode 100644 index 47a5d180b443c2ee151329d8b917de7f4b1b0879..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Distilled-DarkPlanet-Allades-8B/2d57a30c-8a0e-4f18-bb2d-6bf4536bbc86.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Distilled-DarkPlanet-Allades-8B/1762652579.9240808", - "retrieved_timestamp": "1762652579.9240808", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Distilled-DarkPlanet-Allades-8B", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Distilled-DarkPlanet-Allades-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3460163477351206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4633948672868899 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29014295212765956 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Distilled-DarkPlanet-Allades-8B_TIES/9bff68b3-82a4-49b5-90a7-3c0038ddc35a.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Distilled-DarkPlanet-Allades-8B_TIES/9bff68b3-82a4-49b5-90a7-3c0038ddc35a.json deleted file mode 100644 index 4d82fd62f17242cf6c915feaf1d01ede83532feb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Distilled-DarkPlanet-Allades-8B_TIES/9bff68b3-82a4-49b5-90a7-3c0038ddc35a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Distilled-DarkPlanet-Allades-8B_TIES/1762652579.924282", - "retrieved_timestamp": "1762652579.924282", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Distilled-DarkPlanet-Allades-8B_TIES", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Distilled-DarkPlanet-Allades-8B_TIES" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3891807071902552 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5041556910813355 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3868020833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.340093085106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Distilled-Whiskey-8b/cf34d222-197f-4d3d-9786-fb5c019f2552.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Distilled-Whiskey-8b/cf34d222-197f-4d3d-9786-fb5c019f2552.json deleted file mode 100644 index 45895340c177fcb179c768c88cb93c8e6a0ed459..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Distilled-Whiskey-8b/cf34d222-197f-4d3d-9786-fb5c019f2552.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Distilled-Whiskey-8b/1762652579.924494", - "retrieved_timestamp": "1762652579.9244952", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Distilled-Whiskey-8b", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Distilled-Whiskey-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34476743928332376 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5027820189600739 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2545317220543807 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41721874999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3366855053191489 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Hermes3-L3.1-DirtyHarry-8B/a8086735-c7a7-48b5-9219-829e288040f5.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Hermes3-L3.1-DirtyHarry-8B/a8086735-c7a7-48b5-9219-829e288040f5.json deleted file mode 100644 index 5ce5d658b60e96605a7b2ed20e0ebea89d542233..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Hermes3-L3.1-DirtyHarry-8B/a8086735-c7a7-48b5-9219-829e288040f5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Hermes3-L3.1-DirtyHarry-8B/1762652579.925645", - "retrieved_timestamp": "1762652579.925645", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Hermes3-L3.1-DirtyHarry-8B", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Hermes3-L3.1-DirtyHarry-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32423414318452815 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5066388671914118 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4068958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338597074468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Herodotos-14B/271dbfc3-d9cf-4cb7-b1c0-175f016ed32b.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Herodotos-14B/271dbfc3-d9cf-4cb7-b1c0-175f016ed32b.json deleted file mode 100644 index d464b5604518eda494da9fe8acb6cec084c8e7fa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Herodotos-14B/271dbfc3-d9cf-4cb7-b1c0-175f016ed32b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Herodotos-14B/1762652579.925863", - "retrieved_timestamp": "1762652579.925863", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Herodotos-14B", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Herodotos-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4667415790103592 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6435044367110887 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5045317220543807 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4795416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5290059840425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Herodotos-14B_V0.1/3c6d1b1b-465a-4b97-83ed-d2ebd27a905e.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Herodotos-14B_V0.1/3c6d1b1b-465a-4b97-83ed-d2ebd27a905e.json deleted file mode 100644 index ad2344f28dc846b495bf4eae80925710c7b02e3c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Herodotos-14B_V0.1/3c6d1b1b-465a-4b97-83ed-d2ebd27a905e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Herodotos-14B_V0.1/1762652579.9261289", - "retrieved_timestamp": "1762652579.926136", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Herodotos-14B_V0.1", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Herodotos-14B_V0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1878715142488597 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30172239497895226 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22399328859060402 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3683854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11643949468085106 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_L3.1-8B-Dusky-Ink/4eed8b1b-591d-403b-96f4-c6db11e8b234.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_L3.1-8B-Dusky-Ink/4eed8b1b-591d-403b-96f4-c6db11e8b234.json deleted file mode 100644 index fe3d622745726bf064ac319bb9f868a26dba286a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_L3.1-8B-Dusky-Ink/4eed8b1b-591d-403b-96f4-c6db11e8b234.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_L3.1-8B-Dusky-Ink/1762652579.926589", - "retrieved_timestamp": "1762652579.92659", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/L3.1-8B-Dusky-Ink", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/L3.1-8B-Dusky-Ink" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4529780981130068 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5097902234872148 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4223958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36826795212765956 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_L3.1-8B-Dusky-Ink_v0.r1/a43e1d8d-8a9e-445b-9023-fc6d4a41fcfc.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_L3.1-8B-Dusky-Ink_v0.r1/a43e1d8d-8a9e-445b-9023-fc6d4a41fcfc.json deleted file mode 100644 index 834e51d963189dcd561305c3f6b8d7514818b320..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_L3.1-8B-Dusky-Ink_v0.r1/a43e1d8d-8a9e-445b-9023-fc6d4a41fcfc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_L3.1-8B-Dusky-Ink_v0.r1/1762652579.926839", - "retrieved_timestamp": "1762652579.92684", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/L3.1-8B-Dusky-Ink_v0.r1", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/L3.1-8B-Dusky-Ink_v0.r1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19848779017451473 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43372778578458115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3988333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.320561835106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_LThreePointOne-8B-HermesBlackroot/d1c3467e-6189-4d6f-bedb-8c51fa8bfde6.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_LThreePointOne-8B-HermesBlackroot/d1c3467e-6189-4d6f-bedb-8c51fa8bfde6.json deleted file mode 100644 index 5699be2d67390ba35a65fae7cae1bc1b9a58ec5c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_LThreePointOne-8B-HermesBlackroot/d1c3467e-6189-4d6f-bedb-8c51fa8bfde6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_LThreePointOne-8B-HermesBlackroot/1762652579.927087", - "retrieved_timestamp": "1762652579.927088", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/LThreePointOne-8B-HermesBlackroot", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/LThreePointOne-8B-HermesBlackroot" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17920340252751588 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4998333246909241 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3585520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32845744680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_LThreePointOne-8B-HermesInk/1bb3c61f-2f72-4486-87ef-1e6d5ce58478.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_LThreePointOne-8B-HermesInk/1bb3c61f-2f72-4486-87ef-1e6d5ce58478.json deleted file mode 100644 index 8d6579eb91d8d63c9a0ff1c645e438b0cc71e24e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_LThreePointOne-8B-HermesInk/1bb3c61f-2f72-4486-87ef-1e6d5ce58478.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_LThreePointOne-8B-HermesInk/1762652579.927316", - "retrieved_timestamp": "1762652579.927316", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/LThreePointOne-8B-HermesInk", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/LThreePointOne-8B-HermesInk" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4031192790684273 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5222765555856439 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17220543806646527 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4129375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34674202127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-1.5b/26810cc0-541f-4ca5-b76e-f1a63baa61f6.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-1.5b/26810cc0-541f-4ca5-b76e-f1a63baa61f6.json deleted file mode 100644 index 7490e407426635af5d0c88791e821d9cec37b2e5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-1.5b/26810cc0-541f-4ca5-b76e-f1a63baa61f6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-1.5b/1762652579.9280179", - "retrieved_timestamp": "1762652579.9280179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Minerva-1.5b", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Minerva-1.5b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2694295580171722 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4025709779119226 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3655 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.269780585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-1.5b_V0.2/fc5be34b-0fad-4fce-9df1-851e4fd3119d.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-1.5b_V0.2/fc5be34b-0fad-4fce-9df1-851e4fd3119d.json deleted file mode 100644 index 48daaee8eac1ae7c7cb2a9c1a6f0263f27751e5b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-1.5b_V0.2/fc5be34b-0fad-4fce-9df1-851e4fd3119d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-1.5b_V0.2/1762652579.928302", - "retrieved_timestamp": "1762652579.928303", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Minerva-1.5b_V0.2", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Minerva-1.5b_V0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3083474071020448 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3989042137094949 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3960104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29105718085106386 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-10b/848ac6f9-2bb5-48fe-821a-83f28da91f92.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-10b/848ac6f9-2bb5-48fe-821a-83f28da91f92.json deleted file mode 100644 index b3a7843e6544aafa353e2c03b3e15b8062078a73..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-10b/848ac6f9-2bb5-48fe-821a-83f28da91f92.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-10b/1762652579.928542", - "retrieved_timestamp": "1762652579.928543", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Minerva-10b", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Minerva-10b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1878715142488597 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4462036157096501 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36270833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23179853723404256 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 10.067 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-14b-V0.1/fc4971f4-983d-40f9-810a-16ed998c1dad.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-14b-V0.1/fc4971f4-983d-40f9-810a-16ed998c1dad.json deleted file mode 100644 index ed936019ccdad742e7ed9497fd7a61fea930715b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-14b-V0.1/fc4971f4-983d-40f9-810a-16ed998c1dad.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-14b-V0.1/1762652579.92906", - "retrieved_timestamp": "1762652579.9290612", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Minerva-14b-V0.1", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Minerva-14b-V0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0861292481726264 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6089792638423274 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30513595166163143 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36577181208053694 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47002083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5118018617021277 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-14b/54093f2d-15c3-465e-b876-5e4027deeb19.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-14b/54093f2d-15c3-465e-b876-5e4027deeb19.json deleted file mode 100644 index 81a09e96cd29d270a39fb0ded74027dc508cc63b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-14b/54093f2d-15c3-465e-b876-5e4027deeb19.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-14b/1762652579.928819", - "retrieved_timestamp": "1762652579.928819", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Minerva-14b", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Minerva-14b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3467898509288687 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6300829439447851 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30513595166163143 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37416107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.476625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5193650265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-7b/aad7ed5c-d51d-46d7-af15-9c0447a02036.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-7b/aad7ed5c-d51d-46d7-af15-9c0447a02036.json deleted file mode 100644 index fc72c1ba671f71eef2e63cf6216d4ad2e5b898e6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-7b/aad7ed5c-d51d-46d7-af15-9c0447a02036.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-7b/1762652579.929375", - "retrieved_timestamp": "1762652579.929377", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Minerva-7b", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Minerva-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3724196243744376 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5498400501314606 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4143333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44439827127659576 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-8b/08cc58ae-b1dc-489c-ba25-338bb11db2ee.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-8b/08cc58ae-b1dc-489c-ba25-338bb11db2ee.json deleted file mode 100644 index 9f68b562face6adb6fdceedf74ac9820b33565ab..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-8b/08cc58ae-b1dc-489c-ba25-338bb11db2ee.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-8b/1762652579.9296892", - "retrieved_timestamp": "1762652579.9296901", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Minerva-8b", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Minerva-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17208451353519771 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46686093526780637 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4272916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30892619680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Pans_Gutenbergum_V0.1/2a6af60c-eb46-46ae-8140-d050b48069ae.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Pans_Gutenbergum_V0.1/2a6af60c-eb46-46ae-8140-d050b48069ae.json deleted file mode 100644 index 49cf2abcdd1b6e1d151aca754332a2ffd6363e0a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Pans_Gutenbergum_V0.1/2a6af60c-eb46-46ae-8140-d050b48069ae.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Pans_Gutenbergum_V0.1/1762652579.9304042", - "retrieved_timestamp": "1762652579.9304051", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Pans_Gutenbergum_V0.1", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Pans_Gutenbergum_V0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.309696050922663 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5541091780465247 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10574018126888217 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4528125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3696808510638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Pans_Gutenbergum_V0.2/f9eef8a7-1f23-46f1-b57a-062ffd1b81a1.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Pans_Gutenbergum_V0.2/f9eef8a7-1f23-46f1-b57a-062ffd1b81a1.json deleted file mode 100644 index 495dc3a495d9c25bda6498f5362fff7c10c2f8d1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Pans_Gutenbergum_V0.2/f9eef8a7-1f23-46f1-b57a-062ffd1b81a1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Pans_Gutenbergum_V0.2/1762652579.93062", - "retrieved_timestamp": "1762652579.930621", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Pans_Gutenbergum_V0.2", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Pans_Gutenbergum_V0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3215113676157041 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.55257930562769 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46732291666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3585438829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Pantheon_ChatWaifu_V0.2/b57a86fa-8994-4004-a79d-d6da64e64b4d.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Pantheon_ChatWaifu_V0.2/b57a86fa-8994-4004-a79d-d6da64e64b4d.json deleted file mode 100644 index 1de2f54d875b78db58c6431379f87e1dbde29d2d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Pantheon_ChatWaifu_V0.2/b57a86fa-8994-4004-a79d-d6da64e64b4d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Pantheon_ChatWaifu_V0.2/1762652579.930828", - "retrieved_timestamp": "1762652579.930829", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Pantheon_ChatWaifu_V0.2", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Pantheon_ChatWaifu_V0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2682803849341968 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5531574435698693 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47551041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34424867021276595 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-14B-Instruct-1M-Harmony/1cf0506b-dbdd-4f7e-abf5-d812763a722e.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-14B-Instruct-1M-Harmony/1cf0506b-dbdd-4f7e-abf5-d812763a722e.json deleted file mode 100644 index 1ca25d444c1b6e96578cd5ae21618ed6a865c0cd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-14B-Instruct-1M-Harmony/1cf0506b-dbdd-4f7e-abf5-d812763a722e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-14B-Instruct-1M-Harmony/1762652579.93199", - "retrieved_timestamp": "1762652579.931991", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Q2.5-14B-Instruct-1M-Harmony", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Q2.5-14B-Instruct-1M-Harmony" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5986327389105351 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6338808682301471 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3768882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4795416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5074800531914894 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-AthensCOT/54a29a68-c69a-4b49-a87a-cb93c459146a.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-AthensCOT/54a29a68-c69a-4b49-a87a-cb93c459146a.json deleted file mode 100644 index eea53246e040c4b318766d165a8a8de061b4e2df..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-AthensCOT/54a29a68-c69a-4b49-a87a-cb93c459146a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-AthensCOT/1762652579.9322", - "retrieved_timestamp": "1762652579.932201", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Q2.5-AthensCOT", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Q2.5-AthensCOT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45727447616767947 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5541692533534606 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29154078549848944 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4578333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4379155585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-CodeR1-3B/74342d21-8eac-494c-95b9-4df1e828473b.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-CodeR1-3B/74342d21-8eac-494c-95b9-4df1e828473b.json deleted file mode 100644 index dc60e50d3bbcd2ba070057e3ec019d1dae01fb37..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-CodeR1-3B/74342d21-8eac-494c-95b9-4df1e828473b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-CodeR1-3B/1762652579.932402", - "retrieved_timestamp": "1762652579.9324028", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Q2.5-CodeR1-3B", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Q2.5-CodeR1-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35875587884590665 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4660844324968853 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16389728096676737 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43154166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978723404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.085 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-EVACOT-7b/972dfbcf-a5d0-4f9f-a39c-089c30ac91ab.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-EVACOT-7b/972dfbcf-a5d0-4f9f-a39c-089c30ac91ab.json deleted file mode 100644 index 6c3614d4c6bba52e1744008f15295fba0ff03d77..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-EVACOT-7b/972dfbcf-a5d0-4f9f-a39c-089c30ac91ab.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-EVACOT-7b/1762652579.9326148", - "retrieved_timestamp": "1762652579.932616", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Q2.5-EVACOT-7b", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Q2.5-EVACOT-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5784241368457914 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5505524946794311 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2824773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4498645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43309507978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-EvaHumane-RP/5146b3c9-9fdb-4a4e-a687-4bcf44b92309.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-EvaHumane-RP/5146b3c9-9fdb-4a4e-a687-4bcf44b92309.json deleted file mode 100644 index 88a46147c893f5e8d3db6058403edfbb9c1a0c4c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-EvaHumane-RP/5146b3c9-9fdb-4a4e-a687-4bcf44b92309.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-EvaHumane-RP/1762652579.932837", - "retrieved_timestamp": "1762652579.932837", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Q2.5-EvaHumane-RP", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Q2.5-EvaHumane-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3676234613048932 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5328196297646768 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29229607250755285 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42763541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4412400265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-Humane-RP/697ad115-9040-42e4-b94b-529ab27011ee.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-Humane-RP/697ad115-9040-42e4-b94b-529ab27011ee.json deleted file mode 100644 index 1790025ec80f76251224031bcc670f6d908fd7a3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-Humane-RP/697ad115-9040-42e4-b94b-529ab27011ee.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-Humane-RP/1762652579.933056", - "retrieved_timestamp": "1762652579.933057", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Q2.5-Humane-RP", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Q2.5-Humane-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4411627814199657 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5649289292164736 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3391238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4528125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44921875 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-Instruct-1M_Harmony/f4cbe998-8c9f-47c1-a267-5831a40e4cf2.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-Instruct-1M_Harmony/f4cbe998-8c9f-47c1-a267-5831a40e4cf2.json deleted file mode 100644 index b384db78eea43ec8e0c2d39c36abbc1f82215e03..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-Instruct-1M_Harmony/f4cbe998-8c9f-47c1-a267-5831a40e4cf2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-Instruct-1M_Harmony/1762652579.933266", - "retrieved_timestamp": "1762652579.9332669", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Q2.5-Instruct-1M_Harmony", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Q2.5-Instruct-1M_Harmony" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6038034636985421 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5373243549676157 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46878125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43658577127659576 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-R1-3B/a4e4a936-5203-4a9d-a698-417cc9da866f.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-R1-3B/a4e4a936-5203-4a9d-a698-417cc9da866f.json deleted file mode 100644 index 1dc59aa094cf5c7a7736a91e92117610a0b64986..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-R1-3B/a4e4a936-5203-4a9d-a698-417cc9da866f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-R1-3B/1762652579.933473", - "retrieved_timestamp": "1762652579.933474", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Q2.5-R1-3B", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Q2.5-R1-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4213542290012722 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48124304786769817 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2673716012084592 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43197916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38131648936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.085 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-R1-7B/302fa968-5d2d-4750-a1e6-c87534c1eafa.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-R1-7B/302fa968-5d2d-4750-a1e6-c87534c1eafa.json deleted file mode 100644 index 317443a11b4365affa9f8a58aba4f8ce83fae67c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-R1-7B/302fa968-5d2d-4750-a1e6-c87534c1eafa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-R1-7B/1762652579.933674", - "retrieved_timestamp": "1762652579.933675", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Q2.5-R1-7B", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Q2.5-R1-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1346150436397647 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30065625818799685 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3607291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1180186170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Robo-Gutenberg_V1.0/d891d79a-1ec2-44e3-83cd-c28739aecd6e.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Robo-Gutenberg_V1.0/d891d79a-1ec2-44e3-83cd-c28739aecd6e.json deleted file mode 100644 index 9eda526cd71f1ea1d24e1d379bb3890c2b78a33c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Robo-Gutenberg_V1.0/d891d79a-1ec2-44e3-83cd-c28739aecd6e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Robo-Gutenberg_V1.0/1762652579.9338748", - "retrieved_timestamp": "1762652579.933876", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Robo-Gutenberg_V1.0", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Robo-Gutenberg_V1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6007559940956662 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.653716560941194 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4561933534743202 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47436458333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5391456117021277 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Rocinante-Prism_V2.0/9f32b229-a2d5-409b-98d2-65681616aff4.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Rocinante-Prism_V2.0/9f32b229-a2d5-409b-98d2-65681616aff4.json deleted file mode 100644 index 11ce2471dc3ae48656a6e31f192f410d24bfc5fa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Rocinante-Prism_V2.0/9f32b229-a2d5-409b-98d2-65681616aff4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Rocinante-Prism_V2.0/1762652579.9340868", - "retrieved_timestamp": "1762652579.9340868", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Rocinante-Prism_V2.0", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Rocinante-Prism_V2.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2616103051015749 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5361246041982355 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11102719033232629 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.445 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640292553191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Rocinante-Prism_V2.1/7a93ddc1-8694-4b16-8183-1b7f46dfba92.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Rocinante-Prism_V2.1/7a93ddc1-8694-4b16-8183-1b7f46dfba92.json deleted file mode 100644 index 69ffcd9e49e42bb3d96cc2bb5e72d6203f4451b5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Rocinante-Prism_V2.1/7a93ddc1-8694-4b16-8183-1b7f46dfba92.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Rocinante-Prism_V2.1/1762652579.934289", - "retrieved_timestamp": "1762652579.93429", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Rocinante-Prism_V2.1", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Rocinante-Prism_V2.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25584005992987496 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5332676401860506 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44896874999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3651097074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Rombos-Novasky-7B_V1c/a06dc6ef-5d16-402a-a855-b7feec423aa5.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Rombos-Novasky-7B_V1c/a06dc6ef-5d16-402a-a855-b7feec423aa5.json deleted file mode 100644 index 865c5a8690015a054b935a2ce0f99b9348df7d9e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Rombos-Novasky-7B_V1c/a06dc6ef-5d16-402a-a855-b7feec423aa5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Rombos-Novasky-7B_V1c/1762652579.934721", - "retrieved_timestamp": "1762652579.934722", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Rombos-Novasky-7B_V1c", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Rombos-Novasky-7B_V1c" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40801517750679306 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4349247829177707 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08534743202416918 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44645833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27376994680851063 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Set-70b/e25fa684-c237-4bce-8498-7bdfaac970a9.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Set-70b/e25fa684-c237-4bce-8498-7bdfaac970a9.json deleted file mode 100644 index 5bda0314aeb1192647aebf0f63e222a55a27fce0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Set-70b/e25fa684-c237-4bce-8498-7bdfaac970a9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Set-70b/1762652579.934931", - "retrieved_timestamp": "1762652579.934931", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Set-70b", - "developer": "Triangle104", - "inference_platform": "unknown", - "id": "Triangle104/Set-70b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7642954028643998 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.70142939330013 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640483383685801 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4463087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46956250000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5442154255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Tsunami-th/Tsunami-th_Tsunami-0.5-7B-Instruct/df3de449-9abc-4f0a-ba6e-caa48720893a.json b/leaderboard_data/HFOpenLLMv2/Tsunami-th/Tsunami-th_Tsunami-0.5-7B-Instruct/df3de449-9abc-4f0a-ba6e-caa48720893a.json deleted file mode 100644 index 349433bf13f42b5e35d56ab6dd47c80a3ff8b412..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Tsunami-th/Tsunami-th_Tsunami-0.5-7B-Instruct/df3de449-9abc-4f0a-ba6e-caa48720893a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Tsunami-th_Tsunami-0.5-7B-Instruct/1762652579.935141", - "retrieved_timestamp": "1762652579.9351418", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Tsunami-th/Tsunami-0.5-7B-Instruct", - "developer": "Tsunami-th", - "inference_platform": "unknown", - "id": "Tsunami-th/Tsunami-0.5-7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7400153814102137 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.552369427738073 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5045317220543807 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42571875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44132313829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Tsunami-th/Tsunami-th_Tsunami-0.5x-7B-Instruct/fec678b9-c51b-4945-8d4f-f06af6528227.json b/leaderboard_data/HFOpenLLMv2/Tsunami-th/Tsunami-th_Tsunami-0.5x-7B-Instruct/fec678b9-c51b-4945-8d4f-f06af6528227.json deleted file mode 100644 index ba92c73ae7ad3ff0b3c71b1d8d194dd325c897da..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Tsunami-th/Tsunami-th_Tsunami-0.5x-7B-Instruct/fec678b9-c51b-4945-8d4f-f06af6528227.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Tsunami-th_Tsunami-0.5x-7B-Instruct/1762652579.9353971", - "retrieved_timestamp": "1762652579.9353979", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Tsunami-th/Tsunami-0.5x-7B-Instruct", - "developer": "Tsunami-th", - "inference_platform": "unknown", - "id": "Tsunami-th/Tsunami-0.5x-7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.709915247099917 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5592865858560252 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4206948640483384 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46667708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44581117021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Tsunami-th/Tsunami-th_Tsunami-1.0-14B-Instruct/11262698-480b-425b-b013-f362fae2f254.json b/leaderboard_data/HFOpenLLMv2/Tsunami-th/Tsunami-th_Tsunami-1.0-14B-Instruct/11262698-480b-425b-b013-f362fae2f254.json deleted file mode 100644 index 6bd0f1fad4e366cfd518280be0038f23c6e62069..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Tsunami-th/Tsunami-th_Tsunami-1.0-14B-Instruct/11262698-480b-425b-b013-f362fae2f254.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Tsunami-th_Tsunami-1.0-14B-Instruct/1762652579.935597", - "retrieved_timestamp": "1762652579.935597", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Tsunami-th/Tsunami-1.0-14B-Instruct", - "developer": "Tsunami-th", - "inference_platform": "unknown", - "id": "Tsunami-th/Tsunami-1.0-14B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7829049145157072 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6438763263011559 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45845921450151056 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565436241610738 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44593750000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5248503989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Tsunami-th/Tsunami-th_Tsunami-1.0-7B-Instruct/ccffe03b-c166-48de-8516-8253b2c2f96e.json b/leaderboard_data/HFOpenLLMv2/Tsunami-th/Tsunami-th_Tsunami-1.0-7B-Instruct/ccffe03b-c166-48de-8516-8253b2c2f96e.json deleted file mode 100644 index 8ee9ddac452cb1ee0b4b871132b68061c13e0b83..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Tsunami-th/Tsunami-th_Tsunami-1.0-7B-Instruct/ccffe03b-c166-48de-8516-8253b2c2f96e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Tsunami-th_Tsunami-1.0-7B-Instruct/1762652579.9358132", - "retrieved_timestamp": "1762652579.9358132", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Tsunami-th/Tsunami-1.0-7B-Instruct", - "developer": "Tsunami-th", - "inference_platform": "unknown", - "id": "Tsunami-th/Tsunami-1.0-7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.730872972601586 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.549071195618326 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4335347432024169 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44928125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4424035904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/UCLA-AGI/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter1/07af3512-a045-435e-a965-8daa0836905d.json b/leaderboard_data/HFOpenLLMv2/UCLA-AGI/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter1/07af3512-a045-435e-a965-8daa0836905d.json deleted file mode 100644 index 65059a138c42a4fba93b2b17d1b3036afc754665..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/UCLA-AGI/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter1/07af3512-a045-435e-a965-8daa0836905d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter1/1762652579.9367309", - "retrieved_timestamp": "1762652579.9367318", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter1", - "developer": "UCLA-AGI", - "inference_platform": "unknown", - "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7298988904994304 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5057890691082708 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1148036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3567916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37109375 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/UCLA-AGI/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter2/0c5c315f-63c4-427e-a307-1422a197895c.json b/leaderboard_data/HFOpenLLMv2/UCLA-AGI/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter2/0c5c315f-63c4-427e-a307-1422a197895c.json deleted file mode 100644 index 6a10c1cf09da116c3340b81d9c5953bf9865437e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/UCLA-AGI/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter2/0c5c315f-63c4-427e-a307-1422a197895c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter2/1762652579.93697", - "retrieved_timestamp": "1762652579.936971", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter2", - "developer": "UCLA-AGI", - "inference_platform": "unknown", - "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6988745417713889 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5088696278852957 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35942708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36918218085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/UCLA-AGI/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter3/49e095af-ed90-4e64-b476-4fc62d6e6997.json b/leaderboard_data/HFOpenLLMv2/UCLA-AGI/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter3/49e095af-ed90-4e64-b476-4fc62d6e6997.json deleted file mode 100644 index a16ae4e325bd5cba6d58ecf4ecee6921feb49f4a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/UCLA-AGI/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter3/49e095af-ed90-4e64-b476-4fc62d6e6997.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter3/1762652579.937367", - "retrieved_timestamp": "1762652579.9373682", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3", - "developer": "UCLA-AGI", - "inference_platform": "unknown", - "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.67029814226253 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5076407742830437 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3647291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3657746010638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/UCLA-AGI/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter3/d8d05a10-8889-40aa-b56f-365e0a12052c.json b/leaderboard_data/HFOpenLLMv2/UCLA-AGI/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter3/d8d05a10-8889-40aa-b56f-365e0a12052c.json deleted file mode 100644 index 595b41baa3825024472c4b8305e2492eda9b1bc4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/UCLA-AGI/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter3/d8d05a10-8889-40aa-b56f-365e0a12052c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter3/1762652579.937166", - "retrieved_timestamp": "1762652579.9371672", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3", - "developer": "UCLA-AGI", - "inference_platform": "unknown", - "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6834122350917787 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.50795799761689 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09592145015105741 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36606249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3644448138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Undi95/Undi95_MG-FinalMix-72B/3d3598fa-4b23-4ec6-a010-fb20232a5121.json b/leaderboard_data/HFOpenLLMv2/Undi95/Undi95_MG-FinalMix-72B/3d3598fa-4b23-4ec6-a010-fb20232a5121.json deleted file mode 100644 index 4e217e7075b583a2f155531b0d2b8a05dbcda377..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Undi95/Undi95_MG-FinalMix-72B/3d3598fa-4b23-4ec6-a010-fb20232a5121.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Undi95_MG-FinalMix-72B/1762652579.938925", - "retrieved_timestamp": "1762652579.938925", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Undi95/MG-FinalMix-72B", - "developer": "Undi95", - "inference_platform": "unknown", - "id": "Undi95/MG-FinalMix-72B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8013648231137825 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6973017446417747 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3972809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3850671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48227083333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.542719414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/V3N0M/V3N0M_Jenna-Tiny-2.0/d9785857-b164-4d38-8d03-0e03e2d0fbf5.json b/leaderboard_data/HFOpenLLMv2/V3N0M/V3N0M_Jenna-Tiny-2.0/d9785857-b164-4d38-8d03-0e03e2d0fbf5.json deleted file mode 100644 index e54b54f28fedb19d180ce28c61aaf3dc2608127f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/V3N0M/V3N0M_Jenna-Tiny-2.0/d9785857-b164-4d38-8d03-0e03e2d0fbf5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/V3N0M_Jenna-Tiny-2.0/1762652579.9394162", - "retrieved_timestamp": "1762652579.9394171", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "V3N0M/Jenna-Tiny-2.0", - "developer": "V3N0M", - "inference_platform": "unknown", - "id": "V3N0M/Jenna-Tiny-2.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2309361383351729 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31479264061817097 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33666666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1146941489361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.631 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_Llama-3-SauerkrautLM-70b-Instruct/eb8adbdf-2cfb-4e9e-8f75-ce2734907725.json b/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_Llama-3-SauerkrautLM-70b-Instruct/eb8adbdf-2cfb-4e9e-8f75-ce2734907725.json deleted file mode 100644 index 4f9743b7fa9f1a1deb2a6aa0e2f1687aa7882573..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_Llama-3-SauerkrautLM-70b-Instruct/eb8adbdf-2cfb-4e9e-8f75-ce2734907725.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_Llama-3-SauerkrautLM-70b-Instruct/1762652579.939689", - "retrieved_timestamp": "1762652579.939689", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct", - "developer": "VAGOsolutions", - "inference_platform": "unknown", - "id": "VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8044621604010691 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6663247245334951 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2280966767371601 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43393750000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5392287234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_Llama-3-SauerkrautLM-8b-Instruct/ad99531d-4d52-4175-8ebd-cb172b4577de.json b/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_Llama-3-SauerkrautLM-8b-Instruct/ad99531d-4d52-4175-8ebd-cb172b4577de.json deleted file mode 100644 index 48507fccb9c754bb3954b304c398e8d8fe1297cf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_Llama-3-SauerkrautLM-8b-Instruct/ad99531d-4d52-4175-8ebd-cb172b4577de.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_Llama-3-SauerkrautLM-8b-Instruct/1762652579.93995", - "retrieved_timestamp": "1762652579.9399512", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct", - "developer": "VAGOsolutions", - "inference_platform": "unknown", - "id": "VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.744536718130117 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.494337579362695 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42410416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3857214095744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_Llama-3.1-SauerkrautLM-70b-Instruct/2e3eca4b-4c15-4b3b-8c44-3a23312a0797.json b/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_Llama-3.1-SauerkrautLM-70b-Instruct/2e3eca4b-4c15-4b3b-8c44-3a23312a0797.json deleted file mode 100644 index c13a18d5fe24279d5d6db5a2c12507f2cc870077..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_Llama-3.1-SauerkrautLM-70b-Instruct/2e3eca4b-4c15-4b3b-8c44-3a23312a0797.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_Llama-3.1-SauerkrautLM-70b-Instruct/1762652579.940237", - "retrieved_timestamp": "1762652579.940238", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "VAGOsolutions/Llama-3.1-SauerkrautLM-70b-Instruct", - "developer": "VAGOsolutions", - "inference_platform": "unknown", - "id": "VAGOsolutions/Llama-3.1-SauerkrautLM-70b-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8656365111238181 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7006249194404001 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3693353474320242 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4710833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5334940159574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_Llama-3.1-SauerkrautLM-8b-Instruct/aa425d3e-e363-46bf-a5fb-cbf524657e85.json b/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_Llama-3.1-SauerkrautLM-8b-Instruct/aa425d3e-e363-46bf-a5fb-cbf524657e85.json deleted file mode 100644 index b4825eef4ac6ab963192865641ebcf014c0934d4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_Llama-3.1-SauerkrautLM-8b-Instruct/aa425d3e-e363-46bf-a5fb-cbf524657e85.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_Llama-3.1-SauerkrautLM-8b-Instruct/1762652579.9404852", - "retrieved_timestamp": "1762652579.940486", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct", - "developer": "VAGOsolutions", - "inference_platform": "unknown", - "id": "VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8017393848322452 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5114932190011187 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19410876132930513 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4148020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3890458776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-1.5b/22ae39ae-883c-43a7-abbe-3213b9035b58.json b/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-1.5b/22ae39ae-883c-43a7-abbe-3213b9035b58.json deleted file mode 100644 index 5bf53c22c027b1715974abb40941be34ff875a88..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-1.5b/22ae39ae-883c-43a7-abbe-3213b9035b58.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-1.5b/1762652579.940706", - "retrieved_timestamp": "1762652579.940707", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-1.5b", - "developer": "VAGOsolutions", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-1.5b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24040324117785256 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3703912164863146 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37390625000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21509308510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-7b-HerO/be74b2d6-28b9-4227-b0ec-fbad4b7dada6.json b/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-7b-HerO/be74b2d6-28b9-4227-b0ec-fbad4b7dada6.json deleted file mode 100644 index 8f4b3ee9f12e691bf9c603cfe2f41d507be3e541..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-7b-HerO/be74b2d6-28b9-4227-b0ec-fbad4b7dada6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-7b-HerO/1762652579.940931", - "retrieved_timestamp": "1762652579.940931", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-7b-HerO", - "developer": "VAGOsolutions", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-7b-HerO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.534610389322553 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49044349935812964 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39238541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30460438829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-7b-LaserChat/35512aeb-611a-46a8-849e-442fc3fcc23a.json b/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-7b-LaserChat/35512aeb-611a-46a8-849e-442fc3fcc23a.json deleted file mode 100644 index 1dbb1a06d9bb57ab6dfd054808f8489d2434f68b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-7b-LaserChat/35512aeb-611a-46a8-849e-442fc3fcc23a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-7b-LaserChat/1762652579.941142", - "retrieved_timestamp": "1762652579.941143", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-7b-LaserChat", - "developer": "VAGOsolutions", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-7b-LaserChat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5987823419637672 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45432707993295685 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4148020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3304521276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-Mixtral-8x7B-Instruct/f105fe57-632a-4e3b-bbcb-f063f2e10874.json b/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-Mixtral-8x7B-Instruct/f105fe57-632a-4e3b-bbcb-f063f2e10874.json deleted file mode 100644 index 62a26c69bf043538ed09090010e094a1df4f0618..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-Mixtral-8x7B-Instruct/f105fe57-632a-4e3b-bbcb-f063f2e10874.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-Mixtral-8x7B-Instruct/1762652579.9418082", - "retrieved_timestamp": "1762652579.941809", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct", - "developer": "VAGOsolutions", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5601891869129465 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5277342269858817 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09818731117824774 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42041666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3650265957446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-Nemo-12b-Instruct/b5db7846-f777-4fa8-86e9-f09fdee1dfee.json b/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-Nemo-12b-Instruct/b5db7846-f777-4fa8-86e9-f09fdee1dfee.json deleted file mode 100644 index bab93b0c74c3ece945682eb9fd66c4e9d3a45dfd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-Nemo-12b-Instruct/b5db7846-f777-4fa8-86e9-f09fdee1dfee.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-Nemo-12b-Instruct/1762652579.942016", - "retrieved_timestamp": "1762652579.942017", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct", - "developer": "VAGOsolutions", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6112969144093228 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5214128647611115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12235649546827794 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4468958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33851396276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-SOLAR-Instruct/24fbb409-3b1a-4ed2-8866-547a7f02c5dc.json b/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-SOLAR-Instruct/24fbb409-3b1a-4ed2-8866-547a7f02c5dc.json deleted file mode 100644 index faedb1cc8e9789d28f39316b02015d765fa6bd92..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-SOLAR-Instruct/24fbb409-3b1a-4ed2-8866-547a7f02c5dc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-SOLAR-Instruct/1762652579.942544", - "retrieved_timestamp": "1762652579.942544", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-SOLAR-Instruct", - "developer": "VAGOsolutions", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-SOLAR-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49172085621705963 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5169447300097646 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3965416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31831781914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-v2-14b-DPO/e4b13fb1-11c0-4696-856f-de393fe2f8b2.json b/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-v2-14b-DPO/e4b13fb1-11c0-4696-856f-de393fe2f8b2.json deleted file mode 100644 index 4c867ce645843148bb36d42e402fc527e9e39b66..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-v2-14b-DPO/e4b13fb1-11c0-4696-856f-de393fe2f8b2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-v2-14b-DPO/1762652579.943197", - "retrieved_timestamp": "1762652579.943197", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-v2-14b-DPO", - "developer": "VAGOsolutions", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-v2-14b-DPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7411645544931892 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6560374350756156 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3164652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43746875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.51171875 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-v2-14b-SFT/d1b47391-f36e-4819-8093-5aff774dff94.json b/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-v2-14b-SFT/d1b47391-f36e-4819-8093-5aff774dff94.json deleted file mode 100644 index 8d1efc2ca43a0939345d9731db493cb7f238ec80..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-v2-14b-SFT/d1b47391-f36e-4819-8093-5aff774dff94.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-v2-14b-SFT/1762652579.94341", - "retrieved_timestamp": "1762652579.9434109", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-v2-14b-SFT", - "developer": "VAGOsolutions", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-v2-14b-SFT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6948529900663573 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6210355880693049 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3285498489425982 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.417875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5205285904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Vikhrmodels/Vikhrmodels_Vikhr-Llama3.1-8B-Instruct-R-21-09-24/b0332107-4b84-4c0a-b488-187fb3d534ae.json b/leaderboard_data/HFOpenLLMv2/Vikhrmodels/Vikhrmodels_Vikhr-Llama3.1-8B-Instruct-R-21-09-24/b0332107-4b84-4c0a-b488-187fb3d534ae.json deleted file mode 100644 index da902168e0de2e5579aa14d26a56a08d210a0f48..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Vikhrmodels/Vikhrmodels_Vikhr-Llama3.1-8B-Instruct-R-21-09-24/b0332107-4b84-4c0a-b488-187fb3d534ae.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Vikhrmodels_Vikhr-Llama3.1-8B-Instruct-R-21-09-24/1762652579.9476302", - "retrieved_timestamp": "1762652579.9476311", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Vikhrmodels/Vikhr-Llama3.1-8B-Instruct-R-21-09-24", - "developer": "Vikhrmodels", - "inference_platform": "unknown", - "id": "Vikhrmodels/Vikhr-Llama3.1-8B-Instruct-R-21-09-24" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.643145742186288 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.527224269970207 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24496644295302014 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3753958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3547207446808511 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Vikhrmodels/Vikhrmodels_Vikhr-Nemo-12B-Instruct-R-21-09-24/787cc582-61da-4afd-bfac-431377809fd9.json b/leaderboard_data/HFOpenLLMv2/Vikhrmodels/Vikhrmodels_Vikhr-Nemo-12B-Instruct-R-21-09-24/787cc582-61da-4afd-bfac-431377809fd9.json deleted file mode 100644 index a299a4275539c1bdf95a3cb10bb5e6b43e1605f6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Vikhrmodels/Vikhrmodels_Vikhr-Nemo-12B-Instruct-R-21-09-24/787cc582-61da-4afd-bfac-431377809fd9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Vikhrmodels_Vikhr-Nemo-12B-Instruct-R-21-09-24/1762652579.947979", - "retrieved_timestamp": "1762652579.94798", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24", - "developer": "Vikhrmodels", - "inference_platform": "unknown", - "id": "Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5999315150467426 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5212309052827618 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1714501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40730208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976063829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Weyaxi/Weyaxi_Bagel-Hermes-2x34B/5b614673-6566-4b82-bf7c-13268ebb1577.json b/leaderboard_data/HFOpenLLMv2/Weyaxi/Weyaxi_Bagel-Hermes-2x34B/5b614673-6566-4b82-bf7c-13268ebb1577.json deleted file mode 100644 index adbffd722342f7fcc5940dec24e32fe7af8c3234..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Weyaxi/Weyaxi_Bagel-Hermes-2x34B/5b614673-6566-4b82-bf7c-13268ebb1577.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Weyaxi_Bagel-Hermes-2x34B/1762652579.948213", - "retrieved_timestamp": "1762652579.948214", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Weyaxi/Bagel-Hermes-2x34B", - "developer": "Weyaxi", - "inference_platform": "unknown", - "id": "Weyaxi/Bagel-Hermes-2x34B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5431532777474878 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49166555632285514 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45166666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4588597074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 60.814 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Weyaxi/Weyaxi_Bagel-Hermes-34B-Slerp/28439ab5-0e5f-4dae-a98a-e0c1b743a8b0.json b/leaderboard_data/HFOpenLLMv2/Weyaxi/Weyaxi_Bagel-Hermes-34B-Slerp/28439ab5-0e5f-4dae-a98a-e0c1b743a8b0.json deleted file mode 100644 index 3336bed36967d1a31d963c49a32ff1c10dced888..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Weyaxi/Weyaxi_Bagel-Hermes-34B-Slerp/28439ab5-0e5f-4dae-a98a-e0c1b743a8b0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Weyaxi_Bagel-Hermes-34B-Slerp/1762652579.948482", - "retrieved_timestamp": "1762652579.948482", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Weyaxi/Bagel-Hermes-34B-Slerp", - "developer": "Weyaxi", - "inference_platform": "unknown", - "id": "Weyaxi/Bagel-Hermes-34B-Slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4602720780861448 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5921903605860047 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46220833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4703291223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Weyaxi/Weyaxi_Einstein-v4-7B/035c5e35-0ebe-4e91-a598-8d01688462a3.json b/leaderboard_data/HFOpenLLMv2/Weyaxi/Weyaxi_Einstein-v4-7B/035c5e35-0ebe-4e91-a598-8d01688462a3.json deleted file mode 100644 index dbfbe27a1723bba2e600b1e1518dc220b00a6052..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Weyaxi/Weyaxi_Einstein-v4-7B/035c5e35-0ebe-4e91-a598-8d01688462a3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Weyaxi_Einstein-v4-7B/1762652579.948704", - "retrieved_timestamp": "1762652579.948705", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Weyaxi/Einstein-v4-7B", - "developer": "Weyaxi", - "inference_platform": "unknown", - "id": "Weyaxi/Einstein-v4-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47081299839980145 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38494699692741774 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4681666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22589760638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Weyaxi/Weyaxi_SauerkrautLM-UNA-SOLAR-Instruct/8ddec5bb-ab90-4c98-8482-a412e7735246.json b/leaderboard_data/HFOpenLLMv2/Weyaxi/Weyaxi_SauerkrautLM-UNA-SOLAR-Instruct/8ddec5bb-ab90-4c98-8482-a412e7735246.json deleted file mode 100644 index 123ffa5e9fc3c51b02b1a44faf97fabef6dce32e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Weyaxi/Weyaxi_SauerkrautLM-UNA-SOLAR-Instruct/8ddec5bb-ab90-4c98-8482-a412e7735246.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Weyaxi_SauerkrautLM-UNA-SOLAR-Instruct/1762652579.950165", - "retrieved_timestamp": "1762652579.950166", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Weyaxi/SauerkrautLM-UNA-SOLAR-Instruct", - "developer": "Weyaxi", - "inference_platform": "unknown", - "id": "Weyaxi/SauerkrautLM-UNA-SOLAR-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4573243438520902 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5166357112030591 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.397875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31532579787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/WizardLMTeam/WizardLMTeam_WizardLM-13B-V1.0/ab4f785b-779f-423b-9905-31a3b66dfeff.json b/leaderboard_data/HFOpenLLMv2/WizardLMTeam/WizardLMTeam_WizardLM-13B-V1.0/ab4f785b-779f-423b-9905-31a3b66dfeff.json deleted file mode 100644 index 1c4d347f216945a8f62bfc4463701deec1855b14..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/WizardLMTeam/WizardLMTeam_WizardLM-13B-V1.0/ab4f785b-779f-423b-9905-31a3b66dfeff.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/WizardLMTeam_WizardLM-13B-V1.0/1762652579.9503958", - "retrieved_timestamp": "1762652579.950397", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "WizardLMTeam/WizardLM-13B-V1.0", - "developer": "WizardLMTeam", - "inference_platform": "unknown", - "id": "WizardLMTeam/WizardLM-13B-V1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18504900331121424 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29134447696551025 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34971875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11660571808510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/WizardLMTeam/WizardLMTeam_WizardLM-13B-V1.2/f9d2286c-ed89-4c23-b6a2-c623373331cd.json b/leaderboard_data/HFOpenLLMv2/WizardLMTeam/WizardLMTeam_WizardLM-13B-V1.2/f9d2286c-ed89-4c23-b6a2-c623373331cd.json deleted file mode 100644 index 5e1bb1e528702255d017b1ca614e1447d5eb6f46..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/WizardLMTeam/WizardLMTeam_WizardLM-13B-V1.2/f9d2286c-ed89-4c23-b6a2-c623373331cd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/WizardLMTeam_WizardLM-13B-V1.2/1762652579.950676", - "retrieved_timestamp": "1762652579.950676", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "WizardLMTeam/WizardLM-13B-V1.2", - "developer": "WizardLMTeam", - "inference_platform": "unknown", - "id": "WizardLMTeam/WizardLM-13B-V1.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3392465325336773 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44619994364600474 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43784375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25191156914893614 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/WizardLMTeam/WizardLMTeam_WizardLM-70B-V1.0/8c4ff628-41b6-4769-a33e-b1dbffa913cf.json b/leaderboard_data/HFOpenLLMv2/WizardLMTeam/WizardLMTeam_WizardLM-70B-V1.0/8c4ff628-41b6-4769-a33e-b1dbffa913cf.json deleted file mode 100644 index 428262959a27860a9a47031c41828e5a8e76437b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/WizardLMTeam/WizardLMTeam_WizardLM-70B-V1.0/8c4ff628-41b6-4769-a33e-b1dbffa913cf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/WizardLMTeam_WizardLM-70B-V1.0/1762652579.950908", - "retrieved_timestamp": "1762652579.950909", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "WizardLMTeam/WizardLM-70B-V1.0", - "developer": "WizardLMTeam", - "inference_platform": "unknown", - "id": "WizardLMTeam/WizardLM-70B-V1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49514288753839814 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5590366047184262 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43911458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34466422872340424 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Wladastic/Wladastic_Mini-Think-Base-1B/5f9a01b0-632a-4ee4-aedc-279002c7496c.json b/leaderboard_data/HFOpenLLMv2/Wladastic/Wladastic_Mini-Think-Base-1B/5f9a01b0-632a-4ee4-aedc-279002c7496c.json deleted file mode 100644 index 740782842d2600496cca62108ce4047ff31d26e7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Wladastic/Wladastic_Mini-Think-Base-1B/5f9a01b0-632a-4ee4-aedc-279002c7496c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Wladastic_Mini-Think-Base-1B/1762652579.951128", - "retrieved_timestamp": "1762652579.9511292", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Wladastic/Mini-Think-Base-1B", - "developer": "Wladastic", - "inference_platform": "unknown", - "id": "Wladastic/Mini-Think-Base-1B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5588405430923283 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35741728048349203 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32748958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17719414893617022 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Xclbr7/Xclbr7_Arcanum-12b/2d0a414f-1cf2-4ae3-951b-ed69d1ef883f.json b/leaderboard_data/HFOpenLLMv2/Xclbr7/Xclbr7_Arcanum-12b/2d0a414f-1cf2-4ae3-951b-ed69d1ef883f.json deleted file mode 100644 index 29414f527fa8fd67854ee7ec10b4d8dc7716d7a7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Xclbr7/Xclbr7_Arcanum-12b/2d0a414f-1cf2-4ae3-951b-ed69d1ef883f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Xclbr7_Arcanum-12b/1762652579.9514", - "retrieved_timestamp": "1762652579.951401", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Xclbr7/Arcanum-12b", - "developer": "Xclbr7", - "inference_platform": "unknown", - "id": "Xclbr7/Arcanum-12b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2906864896253053 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5265359354118465 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41703124999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3586269946808511 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Xclbr7/Xclbr7_Hyena-12b/06eb233f-5182-4b9e-be3f-21c928eef397.json b/leaderboard_data/HFOpenLLMv2/Xclbr7/Xclbr7_Hyena-12b/06eb233f-5182-4b9e-be3f-21c928eef397.json deleted file mode 100644 index 1a8023a2c07cccf18a43f5c129af40c6c3502f9f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Xclbr7/Xclbr7_Hyena-12b/06eb233f-5182-4b9e-be3f-21c928eef397.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Xclbr7_Hyena-12b/1762652579.9516642", - "retrieved_timestamp": "1762652579.951665", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Xclbr7/Hyena-12b", - "developer": "Xclbr7", - "inference_platform": "unknown", - "id": "Xclbr7/Hyena-12b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3404455733010634 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5457182415468321 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11329305135951662 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39842708333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3439162234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Xclbr7/Xclbr7_caliburn-12b/e897d1fc-2c71-4c61-971b-eeddfae1b75c.json b/leaderboard_data/HFOpenLLMv2/Xclbr7/Xclbr7_caliburn-12b/e897d1fc-2c71-4c61-971b-eeddfae1b75c.json deleted file mode 100644 index f3932473329cfb72a067d4a3f021c88b892463f0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Xclbr7/Xclbr7_caliburn-12b/e897d1fc-2c71-4c61-971b-eeddfae1b75c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Xclbr7_caliburn-12b/1762652579.951879", - "retrieved_timestamp": "1762652579.95188", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Xclbr7/caliburn-12b", - "developer": "Xclbr7", - "inference_platform": "unknown", - "id": "Xclbr7/caliburn-12b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35763108551975425 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5518630300231809 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4291875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36751994680851063 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Xclbr7/Xclbr7_caliburn-v2-12b/18a12670-8785-44ef-a365-78ce797b8ba5.json b/leaderboard_data/HFOpenLLMv2/Xclbr7/Xclbr7_caliburn-v2-12b/18a12670-8785-44ef-a365-78ce797b8ba5.json deleted file mode 100644 index 600bec3b75539c693e86fdcb0f23594b4ccf1a42..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Xclbr7/Xclbr7_caliburn-v2-12b/18a12670-8785-44ef-a365-78ce797b8ba5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Xclbr7_caliburn-v2-12b/1762652579.952102", - "retrieved_timestamp": "1762652579.952102", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Xclbr7/caliburn-v2-12b", - "developer": "Xclbr7", - "inference_platform": "unknown", - "id": "Xclbr7/caliburn-v2-12b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2966816934622358 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5141426125097639 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10498489425981873 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43703125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37840757978723405 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Xiaojian9992024/Xiaojian9992024_Reflection-L3.2-JametMiniMix-3B/e582afbb-99f3-4b43-8ee7-b786680124a9.json b/leaderboard_data/HFOpenLLMv2/Xiaojian9992024/Xiaojian9992024_Reflection-L3.2-JametMiniMix-3B/e582afbb-99f3-4b43-8ee7-b786680124a9.json deleted file mode 100644 index 54d8d917d6c5d2eb70023d9a6d751c42e9b07ae7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Xiaojian9992024/Xiaojian9992024_Reflection-L3.2-JametMiniMix-3B/e582afbb-99f3-4b43-8ee7-b786680124a9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Reflection-L3.2-JametMiniMix-3B/1762652579.9550028", - "retrieved_timestamp": "1762652579.9550028", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Xiaojian9992024/Reflection-L3.2-JametMiniMix-3B", - "developer": "Xiaojian9992024", - "inference_platform": "unknown", - "id": "Xiaojian9992024/Reflection-L3.2-JametMiniMix-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46194541594081484 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4389528940684813 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36673958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29878656914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Yash21/Yash21_TinyYi-7B-Test/d6a9abee-29ee-44e0-802c-c3e4354ebbac.json b/leaderboard_data/HFOpenLLMv2/Yash21/Yash21_TinyYi-7B-Test/d6a9abee-29ee-44e0-802c-c3e4354ebbac.json deleted file mode 100644 index 5dc104502409511e1a670653f75c2315ff84b1d9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Yash21/Yash21_TinyYi-7B-Test/d6a9abee-29ee-44e0-802c-c3e4354ebbac.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Yash21_TinyYi-7B-Test/1762652579.960211", - "retrieved_timestamp": "1762652579.960212", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Yash21/TinyYi-7B-Test", - "developer": "Yash21", - "inference_platform": "unknown", - "id": "Yash21/TinyYi-7B-Test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18564852369490728 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29098007801214715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3364479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10912566489361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.061 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_1PARAMMYL-8B-ModelStock/87231cbd-d911-434d-991b-1eb373cdde4f.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_1PARAMMYL-8B-ModelStock/87231cbd-d911-434d-991b-1eb373cdde4f.json deleted file mode 100644 index 8a862065f8f58aa7f711a519bccba1af27cdbdba..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_1PARAMMYL-8B-ModelStock/87231cbd-d911-434d-991b-1eb373cdde4f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Youlln_1PARAMMYL-8B-ModelStock/1762652579.9604638", - "retrieved_timestamp": "1762652579.960465", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Youlln/1PARAMMYL-8B-ModelStock", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/1PARAMMYL-8B-ModelStock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5371336941537344 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5215839663555125 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1487915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4409375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4000166223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_2PRYMMAL-Yi1.5-6B-SLERP/e80773ef-5ca2-43de-ba99-a7a997aab7f0.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_2PRYMMAL-Yi1.5-6B-SLERP/e80773ef-5ca2-43de-ba99-a7a997aab7f0.json deleted file mode 100644 index 815b64a7071c38268a8e9cb1c1e3d81780240426..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_2PRYMMAL-Yi1.5-6B-SLERP/e80773ef-5ca2-43de-ba99-a7a997aab7f0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Youlln_2PRYMMAL-Yi1.5-6B-SLERP/1762652579.9607239", - "retrieved_timestamp": "1762652579.960725", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Youlln/2PRYMMAL-Yi1.5-6B-SLERP", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/2PRYMMAL-Yi1.5-6B-SLERP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28259351853083153 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46647504291710673 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11329305135951662 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47560416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3169880319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.061 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-MIRAGE-1-12B/f3f55015-88c7-41ae-b588-9a1eedd56fc2.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-MIRAGE-1-12B/f3f55015-88c7-41ae-b588-9a1eedd56fc2.json deleted file mode 100644 index 613e2b356a2c09a7c5e1193c5daca5bf1dae5282..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-MIRAGE-1-12B/f3f55015-88c7-41ae-b588-9a1eedd56fc2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-MIRAGE-1-12B/1762652579.96142", - "retrieved_timestamp": "1762652579.96142", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Youlln/ECE-MIRAGE-1-12B", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-MIRAGE-1-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20698081091503875 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30107140221306034 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3219375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11095412234042554 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 15.21 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-MIRAGE-1-15B/f904e587-76ac-4583-9235-fcdd20d9a626.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-MIRAGE-1-15B/f904e587-76ac-4583-9235-fcdd20d9a626.json deleted file mode 100644 index 90178b7c642f741f56e29023e46474cd85c45bb3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-MIRAGE-1-15B/f904e587-76ac-4583-9235-fcdd20d9a626.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-MIRAGE-1-15B/1762652579.961622", - "retrieved_timestamp": "1762652579.961622", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Youlln/ECE-MIRAGE-1-15B", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-MIRAGE-1-15B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20698081091503875 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30107140221306034 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3219375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11095412234042554 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 15.21 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-FT-V3-MUSR/de30a84d-c8cc-4f3c-9eb4-3f58754dc46b.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-FT-V3-MUSR/de30a84d-c8cc-4f3c-9eb4-3f58754dc46b.json deleted file mode 100644 index 2d73d146d01d7d396088233fe7ce080e73c5362f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-FT-V3-MUSR/de30a84d-c8cc-4f3c-9eb4-3f58754dc46b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-0.5B-FT-V3-MUSR/1762652579.962029", - "retrieved_timestamp": "1762652579.962029", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Youlln/ECE-PRYMMAL-0.5B-FT-V3-MUSR", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-PRYMMAL-0.5B-FT-V3-MUSR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15334977858748122 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3041148294962408 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36603125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1644780585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-FT-V3/45c46c5d-cf81-42d4-bf9e-61aca49b2959.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-FT-V3/45c46c5d-cf81-42d4-bf9e-61aca49b2959.json deleted file mode 100644 index cc39e6fb51b96cd5ef4a4206752073ba314cc2d2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-FT-V3/45c46c5d-cf81-42d4-bf9e-61aca49b2959.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-0.5B-FT-V3/1762652579.9618208", - "retrieved_timestamp": "1762652579.9618208", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Youlln/ECE-PRYMMAL-0.5B-FT-V3", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-PRYMMAL-0.5B-FT-V3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16419101317836673 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30931341134548046 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0030211480362537764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3644479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11610704787234043 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-FT-V4-MUSR/68382b86-8a68-428e-8338-144a76b8c293.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-FT-V4-MUSR/68382b86-8a68-428e-8338-144a76b8c293.json deleted file mode 100644 index 648059681d5e04792e7bdfc747d8a664a9d9bf09..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-FT-V4-MUSR/68382b86-8a68-428e-8338-144a76b8c293.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-0.5B-FT-V4-MUSR/1762652579.9622452", - "retrieved_timestamp": "1762652579.962246", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Youlln/ECE-PRYMMAL-0.5B-FT-V4-MUSR", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-PRYMMAL-0.5B-FT-V4-MUSR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1137570535069172 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3038362724383693 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3528854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13214760638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-SLERP-V2/c0fe65df-7e51-48ad-bf40-fd163804cad1.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-SLERP-V2/c0fe65df-7e51-48ad-bf40-fd163804cad1.json deleted file mode 100644 index b59f85dba6b0f29c33b9bbab4df0fe1438802300..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-SLERP-V2/c0fe65df-7e51-48ad-bf40-fd163804cad1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-0.5B-SLERP-V2/1762652579.962454", - "retrieved_timestamp": "1762652579.962455", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V2", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1611934112599015 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2934774313772131 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3831145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10945811170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-SLERP-V3/d67c4d9a-d5cc-4b26-a439-44c87a299ee8.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-SLERP-V3/d67c4d9a-d5cc-4b26-a439-44c87a299ee8.json deleted file mode 100644 index 9a2f9bc044145e43139df27ee3994713211c309e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-SLERP-V3/d67c4d9a-d5cc-4b26-a439-44c87a299ee8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-0.5B-SLERP-V3/1762652579.9626722", - "retrieved_timestamp": "1762652579.9626722", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V3", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16701352411601217 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29383772587210827 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.354125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10871010638297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-YL-1B-SLERP-V1/70577ab1-a0ef-41f3-8d6a-00b0b873ee39.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-YL-1B-SLERP-V1/70577ab1-a0ef-41f3-8d6a-00b0b873ee39.json deleted file mode 100644 index 08dadfd6cd7f2b8fdee565f1bc87208f48ba1d59..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-YL-1B-SLERP-V1/70577ab1-a0ef-41f3-8d6a-00b0b873ee39.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-YL-1B-SLERP-V1/1762652579.962892", - "retrieved_timestamp": "1762652579.962893", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V1", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32510848991786234 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4208506248736219 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10725075528700906 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4265833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2935505319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-YL-1B-SLERP-V2/6021f954-951a-47e1-980d-ce729f9f39b4.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-YL-1B-SLERP-V2/6021f954-951a-47e1-980d-ce729f9f39b4.json deleted file mode 100644 index 0ead19a739c8eb42a7e71d0ad8c523491105fbe5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-YL-1B-SLERP-V2/6021f954-951a-47e1-980d-ce729f9f39b4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-YL-1B-SLERP-V2/1762652579.963118", - "retrieved_timestamp": "1762652579.963118", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V2", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32510848991786234 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4208506248736219 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10725075528700906 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4265833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2935505319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-YL-7B-SLERP-V4/e027a39b-1213-42aa-b66f-b1853c644532.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-YL-7B-SLERP-V4/e027a39b-1213-42aa-b66f-b1853c644532.json deleted file mode 100644 index b2eb1604e3f7960bd7f9467121d1cabceabee827..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-YL-7B-SLERP-V4/e027a39b-1213-42aa-b66f-b1853c644532.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-YL-7B-SLERP-V4/1762652579.963329", - "retrieved_timestamp": "1762652579.963329", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Youlln/ECE-PRYMMAL-YL-7B-SLERP-V4", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-PRYMMAL-YL-7B-SLERP-V4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2509696494190969 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37697272812325017 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3744895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2131815159574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL0.5-FT/4264c0fc-9f40-4c27-b877-63a751678a1c.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL0.5-FT/4264c0fc-9f40-4c27-b877-63a751678a1c.json deleted file mode 100644 index 4c460cf0befa2e2dcad168fd19eeb3a47d156564..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL0.5-FT/4264c0fc-9f40-4c27-b877-63a751678a1c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL0.5-FT/1762652579.963541", - "retrieved_timestamp": "1762652579.963541", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Youlln/ECE-PRYMMAL0.5-FT", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-PRYMMAL0.5-FT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18507338306803725 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31320911187036277 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.330125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14768949468085107 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL0.5B-Youri/46564b0a-1489-4c98-9e7b-20daf58c2f87.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL0.5B-Youri/46564b0a-1489-4c98-9e7b-20daf58c2f87.json deleted file mode 100644 index f12f1ead10a847a7d002ee7fefadc3e10c8bc25d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL0.5B-Youri/46564b0a-1489-4c98-9e7b-20daf58c2f87.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL0.5B-Youri/1762652579.963748", - "retrieved_timestamp": "1762652579.9637492", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Youlln/ECE-PRYMMAL0.5B-Youri", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-PRYMMAL0.5B-Youri" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1446317991817267 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28173574256265815 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36965625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10954122340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL1B-FT-V1/c3a0b587-b379-4013-a5ce-26fdc9dcc44d.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL1B-FT-V1/c3a0b587-b379-4013-a5ce-26fdc9dcc44d.json deleted file mode 100644 index 60d65c97df9b090daa0bc9bb49ee6e664fb3b8d6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL1B-FT-V1/c3a0b587-b379-4013-a5ce-26fdc9dcc44d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL1B-FT-V1/1762652579.963949", - "retrieved_timestamp": "1762652579.9639502", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Youlln/ECE-PRYMMAL1B-FT-V1", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-PRYMMAL1B-FT-V1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2143745262569981 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4032647427840684 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34165625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2742686170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-Qwen0.5B-FT-V2/ee8952db-9f0a-4892-bff9-4d2ca1b66364.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-Qwen0.5B-FT-V2/ee8952db-9f0a-4892-bff9-4d2ca1b66364.json deleted file mode 100644 index a650a0c2a0e2cb6c5373cfcc23668759f357c567..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-Qwen0.5B-FT-V2/ee8952db-9f0a-4892-bff9-4d2ca1b66364.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Youlln_ECE-Qwen0.5B-FT-V2/1762652579.9641678", - "retrieved_timestamp": "1762652579.964169", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Youlln/ECE-Qwen0.5B-FT-V2", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE-Qwen0.5B-FT-V2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25259311958935626 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.328970813623839 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30628125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16655585106382978 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE.EIFFEIL.ia-0.5B-SLERP/7a5fdffa-146b-43fd-a979-728c37ae599f.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE.EIFFEIL.ia-0.5B-SLERP/7a5fdffa-146b-43fd-a979-728c37ae599f.json deleted file mode 100644 index 8f573342110aefc26dd485c9bd2d87a71067e69a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE.EIFFEIL.ia-0.5B-SLERP/7a5fdffa-146b-43fd-a979-728c37ae599f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Youlln_ECE.EIFFEIL.ia-0.5B-SLERP/1762652579.964375", - "retrieved_timestamp": "1762652579.964375", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Youlln/ECE.EIFFEIL.ia-0.5B-SLERP", - "developer": "Youlln", - "inference_platform": "unknown", - "id": "Youlln/ECE.EIFFEIL.ia-0.5B-SLERP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2561403742071038 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33056720460862643 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05966767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31021875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1903257978723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Yuma42/Yuma42_KangalKhan-RawRuby-7B/4ad4a260-770a-4cce-9ba7-546cfa4cde58.json b/leaderboard_data/HFOpenLLMv2/Yuma42/Yuma42_KangalKhan-RawRuby-7B/4ad4a260-770a-4cce-9ba7-546cfa4cde58.json deleted file mode 100644 index 19a58f89ee45c1028e47d22164b061a0fe76234b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Yuma42/Yuma42_KangalKhan-RawRuby-7B/4ad4a260-770a-4cce-9ba7-546cfa4cde58.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Yuma42_KangalKhan-RawRuby-7B/1762652579.9648829", - "retrieved_timestamp": "1762652579.964884", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Yuma42/KangalKhan-RawRuby-7B", - "developer": "Yuma42", - "inference_platform": "unknown", - "id": "Yuma42/KangalKhan-RawRuby-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.547674614467391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47547278683676025 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39495833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30227726063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/Z1-Coder/Z1-Coder_Z1-Coder-7B/750b35ad-fdf6-4243-91e7-aee90f84fa5b.json b/leaderboard_data/HFOpenLLMv2/Z1-Coder/Z1-Coder_Z1-Coder-7B/750b35ad-fdf6-4243-91e7-aee90f84fa5b.json deleted file mode 100644 index fd0ddf8c0b4261a6f997530cf4128fde6e863895..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/Z1-Coder/Z1-Coder_Z1-Coder-7B/750b35ad-fdf6-4243-91e7-aee90f84fa5b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Z1-Coder_Z1-Coder-7B/1762652579.9655669", - "retrieved_timestamp": "1762652579.965568", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Z1-Coder/Z1-Coder-7B", - "developer": "Z1-Coder", - "inference_platform": "unknown", - "id": "Z1-Coder/Z1-Coder-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3215113676157041 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48418251218099567 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36215625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37591422872340424 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ZHLiu627/ZHLiu627_zephyr-7b-gemma-dpo-avg/856a1f50-7ffb-4eb1-be4a-8aaa3cd6ee66.json b/leaderboard_data/HFOpenLLMv2/ZHLiu627/ZHLiu627_zephyr-7b-gemma-dpo-avg/856a1f50-7ffb-4eb1-be4a-8aaa3cd6ee66.json deleted file mode 100644 index 145bb23cdda7834d9b41ed30a06f70416fd17d03..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ZHLiu627/ZHLiu627_zephyr-7b-gemma-dpo-avg/856a1f50-7ffb-4eb1-be4a-8aaa3cd6ee66.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ZHLiu627_zephyr-7b-gemma-dpo-avg/1762652579.9658082", - "retrieved_timestamp": "1762652579.9658089", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ZHLiu627/zephyr-7b-gemma-dpo-avg", - "developer": "ZHLiu627", - "inference_platform": "unknown", - "id": "ZHLiu627/zephyr-7b-gemma-dpo-avg" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30899679517014855 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41488227982365095 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4107083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28507313829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ZeroXClem/ZeroXClem_L3-Aspire-Heart-Matrix-8B/e6d8d952-5a3d-4a97-860c-8275b10c6516.json b/leaderboard_data/HFOpenLLMv2/ZeroXClem/ZeroXClem_L3-Aspire-Heart-Matrix-8B/e6d8d952-5a3d-4a97-860c-8275b10c6516.json deleted file mode 100644 index 97f2127f564f58e56033393a7a565e4e56e0837c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ZeroXClem/ZeroXClem_L3-Aspire-Heart-Matrix-8B/e6d8d952-5a3d-4a97-860c-8275b10c6516.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ZeroXClem_L3-Aspire-Heart-Matrix-8B/1762652579.96632", - "retrieved_timestamp": "1762652579.966321", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ZeroXClem/L3-Aspire-Heart-Matrix-8B", - "developer": "ZeroXClem", - "inference_platform": "unknown", - "id": "ZeroXClem/L3-Aspire-Heart-Matrix-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48335305877294465 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5384211938486898 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18277945619335348 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4187083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3784906914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ZeusLabs/ZeusLabs_L3-Aethora-15B-V2/0e9ed58c-1a3e-49b4-8013-994642a95920.json b/leaderboard_data/HFOpenLLMv2/ZeusLabs/ZeusLabs_L3-Aethora-15B-V2/0e9ed58c-1a3e-49b4-8013-994642a95920.json deleted file mode 100644 index e57d28d679d010d2afa523862bc1e0ce19c55a23..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ZeusLabs/ZeusLabs_L3-Aethora-15B-V2/0e9ed58c-1a3e-49b4-8013-994642a95920.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ZeusLabs_L3-Aethora-15B-V2/1762652579.968798", - "retrieved_timestamp": "1762652579.9687989", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ZeusLabs/L3-Aethora-15B-V2", - "developer": "ZeusLabs", - "inference_platform": "unknown", - "id": "ZeusLabs/L3-Aethora-15B-V2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7208063493752133 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5010910465463698 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08081570996978851 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3870833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3499833776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 15.01 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ZhangShenao/ZhangShenao_SELM-Llama-3-8B-Instruct-iter-3/6bf4063b-44aa-4809-a400-5406abe5eb2e.json b/leaderboard_data/HFOpenLLMv2/ZhangShenao/ZhangShenao_SELM-Llama-3-8B-Instruct-iter-3/6bf4063b-44aa-4809-a400-5406abe5eb2e.json deleted file mode 100644 index 16acba86f9c00dea694a78b3c241899e81339eaf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ZhangShenao/ZhangShenao_SELM-Llama-3-8B-Instruct-iter-3/6bf4063b-44aa-4809-a400-5406abe5eb2e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ZhangShenao_SELM-Llama-3-8B-Instruct-iter-3/1762652579.9690418", - "retrieved_timestamp": "1762652579.969043", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ZhangShenao/SELM-Llama-3-8B-Instruct-iter-3", - "developer": "ZhangShenao", - "inference_platform": "unknown", - "id": "ZhangShenao/SELM-Llama-3-8B-Instruct-iter-3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6902817856620433 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5046089390770511 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08610271903323263 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38451041666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3783244680851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Dracarys-72B-Instruct/2f1e6f4e-86e6-47a4-96e6-3bc2b330cd3a.json b/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Dracarys-72B-Instruct/2f1e6f4e-86e6-47a4-96e6-3bc2b330cd3a.json deleted file mode 100644 index eb9544f7138bbb06c5b26a71146a3628ed86a879..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Dracarys-72B-Instruct/2f1e6f4e-86e6-47a4-96e6-3bc2b330cd3a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/abacusai_Dracarys-72B-Instruct/1762652579.969532", - "retrieved_timestamp": "1762652579.969532", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "abacusai/Dracarys-72B-Instruct", - "developer": "abacusai", - "inference_platform": "unknown", - "id": "abacusai/Dracarys-72B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7855778224001206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6944066392084981 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39093959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4558229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5456283244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-34B-v0.1/e0b9044d-1b87-44f7-b59b-88d790f429e5.json b/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-34B-v0.1/e0b9044d-1b87-44f7-b59b-88d790f429e5.json deleted file mode 100644 index d6c386a5287771a967b5bf89aa321357cb0892b8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-34B-v0.1/e0b9044d-1b87-44f7-b59b-88d790f429e5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/abacusai_Smaug-34B-v0.1/1762652579.970392", - "retrieved_timestamp": "1762652579.9703932", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "abacusai/Smaug-34B-v0.1", - "developer": "abacusai", - "inference_platform": "unknown", - "id": "abacusai/Smaug-34B-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5015625207782018 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5357785983493821 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3296979865771812 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.397875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4542885638297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-72B-v0.1/a3b08cd3-6ead-4db0-92ed-212c6b0e45ee.json b/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-72B-v0.1/a3b08cd3-6ead-4db0-92ed-212c6b0e45ee.json deleted file mode 100644 index e64036cc2af5f462846f88fe8ba44f0952276bd6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-72B-v0.1/a3b08cd3-6ead-4db0-92ed-212c6b0e45ee.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/abacusai_Smaug-72B-v0.1/1762652579.970887", - "retrieved_timestamp": "1762652579.9708889", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "abacusai/Smaug-72B-v0.1", - "developer": "abacusai", - "inference_platform": "unknown", - "id": "abacusai/Smaug-72B-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5167001334237601 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5995632330786429 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19108761329305135 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4473229166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4623503989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 72.289 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-Llama-3-70B-Instruct-32K/962b4977-63f0-4a87-a36e-f3e592b74761.json b/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-Llama-3-70B-Instruct-32K/962b4977-63f0-4a87-a36e-f3e592b74761.json deleted file mode 100644 index b0f2994fbc4d959f2e669a8f0092e4f3519b8b90..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-Llama-3-70B-Instruct-32K/962b4977-63f0-4a87-a36e-f3e592b74761.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/abacusai_Smaug-Llama-3-70B-Instruct-32K/1762652579.971162", - "retrieved_timestamp": "1762652579.9711628", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "abacusai/Smaug-Llama-3-70B-Instruct-32K", - "developer": "abacusai", - "inference_platform": "unknown", - "id": "abacusai/Smaug-Llama-3-70B-Instruct-32K" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7761107195574409 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6493108088828602 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27492447129909364 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4207916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47647938829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-Mixtral-v0.1/ba0fe822-7a57-4ccb-a97e-e852a59d9ae1.json b/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-Mixtral-v0.1/ba0fe822-7a57-4ccb-a97e-e852a59d9ae1.json deleted file mode 100644 index 9ab3a95b6601887f31659ab5f36405bc7e75ea0a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-Mixtral-v0.1/ba0fe822-7a57-4ccb-a97e-e852a59d9ae1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/abacusai_Smaug-Mixtral-v0.1/1762652579.971408", - "retrieved_timestamp": "1762652579.9714088", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "abacusai/Smaug-Mixtral-v0.1", - "developer": "abacusai", - "inference_platform": "unknown", - "id": "abacusai/Smaug-Mixtral-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5554428915278129 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5162245602454115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09516616314199396 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4298125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3351894946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-Qwen2-72B-Instruct/84695a6b-dc11-448c-bbeb-b3cc05cde7ba.json b/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-Qwen2-72B-Instruct/84695a6b-dc11-448c-bbeb-b3cc05cde7ba.json deleted file mode 100644 index d158e05e84632c7ed96344f9a17c4ca537bc8ecc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-Qwen2-72B-Instruct/84695a6b-dc11-448c-bbeb-b3cc05cde7ba.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/abacusai_Smaug-Qwen2-72B-Instruct/1762652579.9716392", - "retrieved_timestamp": "1762652579.97164", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "abacusai/Smaug-Qwen2-72B-Instruct", - "developer": "abacusai", - "inference_platform": "unknown", - "id": "abacusai/Smaug-Qwen2-72B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7825303527972447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6909789934583822 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4131419939577039 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615771812080537 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44007291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.519032579787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_bigstral-12b-32k/aed1ac03-5364-477e-ab8f-68b599170128.json b/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_bigstral-12b-32k/aed1ac03-5364-477e-ab8f-68b599170128.json deleted file mode 100644 index c719c1240d953e9a23c70874b944843f3150539d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_bigstral-12b-32k/aed1ac03-5364-477e-ab8f-68b599170128.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/abacusai_bigstral-12b-32k/1762652579.971883", - "retrieved_timestamp": "1762652579.971884", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "abacusai/bigstral-12b-32k", - "developer": "abacusai", - "inference_platform": "unknown", - "id": "abacusai/bigstral-12b-32k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41938057686937324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4700122314782882 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45597916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26412898936170215 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.476 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_bigyi-15b/19b4d65c-39c7-4b81-bb71-f166ab4f9490.json b/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_bigyi-15b/19b4d65c-39c7-4b81-bb71-f166ab4f9490.json deleted file mode 100644 index 9469eaf24a2618e32d5a8d40fbc10e72c07f170c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_bigyi-15b/19b4d65c-39c7-4b81-bb71-f166ab4f9490.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/abacusai_bigyi-15b/1762652579.972117", - "retrieved_timestamp": "1762652579.972117", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "abacusai/bigyi-15b", - "developer": "abacusai", - "inference_platform": "unknown", - "id": "abacusai/bigyi-15b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20940327220663396 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4345298820215116 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35378125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30028257978723405 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 15.058 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/abhishek/abhishek_autotrain-0tmgq-5tpbg/b5707c22-a2a2-4787-a902-b72945ebccd9.json b/leaderboard_data/HFOpenLLMv2/abhishek/abhishek_autotrain-0tmgq-5tpbg/b5707c22-a2a2-4787-a902-b72945ebccd9.json deleted file mode 100644 index 1e269ffdb909bee611e1ac70b2931a4cf5ceab52..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/abhishek/abhishek_autotrain-0tmgq-5tpbg/b5707c22-a2a2-4787-a902-b72945ebccd9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/abhishek_autotrain-0tmgq-5tpbg/1762652579.972783", - "retrieved_timestamp": "1762652579.972784", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "abhishek/autotrain-0tmgq-5tpbg", - "developer": "abhishek", - "inference_platform": "unknown", - "id": "abhishek/autotrain-0tmgq-5tpbg" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19516549422199764 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3127326480314375 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35837499999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11436170212765957 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/abhishek/abhishek_autotrain-0tmgq-5tpbg/ddd32642-ed7a-41b8-974a-f85b7f04d0db.json b/leaderboard_data/HFOpenLLMv2/abhishek/abhishek_autotrain-0tmgq-5tpbg/ddd32642-ed7a-41b8-974a-f85b7f04d0db.json deleted file mode 100644 index 5b9d4bd846596750a6059827fc595492aa194983..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/abhishek/abhishek_autotrain-0tmgq-5tpbg/ddd32642-ed7a-41b8-974a-f85b7f04d0db.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/abhishek_autotrain-0tmgq-5tpbg/1762652579.972393", - "retrieved_timestamp": "1762652579.972395", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "abhishek/autotrain-0tmgq-5tpbg", - "developer": "abhishek", - "inference_platform": "unknown", - "id": "abhishek/autotrain-0tmgq-5tpbg" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19571514692127998 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3134513987945074 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36504166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11510970744680851 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/abhishek/abhishek_autotrain-vr4a1-e5mms/e1462a5a-d120-4c0f-ba13-fbecb18619a0.json b/leaderboard_data/HFOpenLLMv2/abhishek/abhishek_autotrain-vr4a1-e5mms/e1462a5a-d120-4c0f-ba13-fbecb18619a0.json deleted file mode 100644 index 99960a026e163d243184977ec0e809ee6a8d69d4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/abhishek/abhishek_autotrain-vr4a1-e5mms/e1462a5a-d120-4c0f-ba13-fbecb18619a0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/abhishek_autotrain-vr4a1-e5mms/1762652579.973708", - "retrieved_timestamp": "1762652579.973709", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "abhishek/autotrain-vr4a1-e5mms", - "developer": "abhishek", - "inference_platform": "unknown", - "id": "abhishek/autotrain-vr4a1-e5mms" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21422492320376602 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5000624442873264 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14123867069486404 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.389125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36668882978723405 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 16.061 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/adamo1139/adamo1139_Yi-34B-200K-AEZAKMI-v2/a28de361-e90d-44f7-b609-e4d64ae1be6f.json b/leaderboard_data/HFOpenLLMv2/adamo1139/adamo1139_Yi-34B-200K-AEZAKMI-v2/a28de361-e90d-44f7-b609-e4d64ae1be6f.json deleted file mode 100644 index 5a30331a4cb961d53ba27b12c7ca5816677d25a9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/adamo1139/adamo1139_Yi-34B-200K-AEZAKMI-v2/a28de361-e90d-44f7-b609-e4d64ae1be6f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/adamo1139_Yi-34B-200K-AEZAKMI-v2/1762652579.974368", - "retrieved_timestamp": "1762652579.974369", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "adamo1139/Yi-34B-200K-AEZAKMI-v2", - "developer": "adamo1139", - "inference_platform": "unknown", - "id": "adamo1139/Yi-34B-200K-AEZAKMI-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4555257827010111 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5383819237015192 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38860416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4512965425531915 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/aevalone/aevalone_distill_qw_test/108ead60-3cee-43e7-925a-619bace5b65f.json b/leaderboard_data/HFOpenLLMv2/aevalone/aevalone_distill_qw_test/108ead60-3cee-43e7-925a-619bace5b65f.json deleted file mode 100644 index 5c8f47cfe0750cb4b47ea1ceec635719458a792a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/aevalone/aevalone_distill_qw_test/108ead60-3cee-43e7-925a-619bace5b65f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/aevalone_distill_qw_test/1762652579.975426", - "retrieved_timestamp": "1762652579.9754272", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "aevalone/distill_qw_test", - "developer": "aevalone", - "inference_platform": "unknown", - "id": "aevalone/distill_qw_test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.740889728143548 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5245748734435777 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4780966767371601 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38596874999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4091589095744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/agentlans/agentlans_Llama-3.2-1B-Instruct-CrashCourse12K/fbedd898-b839-49c1-bd6d-3a8744d4138a.json b/leaderboard_data/HFOpenLLMv2/agentlans/agentlans_Llama-3.2-1B-Instruct-CrashCourse12K/fbedd898-b839-49c1-bd6d-3a8744d4138a.json deleted file mode 100644 index 7a1a41bedb5a4841f57762820b229fc19383e0c8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/agentlans/agentlans_Llama-3.2-1B-Instruct-CrashCourse12K/fbedd898-b839-49c1-bd6d-3a8744d4138a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/agentlans_Llama-3.2-1B-Instruct-CrashCourse12K/1762652579.976028", - "retrieved_timestamp": "1762652579.976029", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "agentlans/Llama-3.2-1B-Instruct-CrashCourse12K", - "developer": "agentlans", - "inference_platform": "unknown", - "id": "agentlans/Llama-3.2-1B-Instruct-CrashCourse12K" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5395062877609188 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35481032861183426 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07099697885196375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2407718120805369 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32104166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1809341755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/agentlans/agentlans_Llama3.1-Daredevilish-Instruct/7a6d7a66-5772-4793-9597-ef0225b63f30.json b/leaderboard_data/HFOpenLLMv2/agentlans/agentlans_Llama3.1-Daredevilish-Instruct/7a6d7a66-5772-4793-9597-ef0225b63f30.json deleted file mode 100644 index 0026724efc0ddb6ca0bb89d5dba41e1c4b63d486..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/agentlans/agentlans_Llama3.1-Daredevilish-Instruct/7a6d7a66-5772-4793-9597-ef0225b63f30.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/agentlans_Llama3.1-Daredevilish-Instruct/1762652579.9768262", - "retrieved_timestamp": "1762652579.976827", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "agentlans/Llama3.1-Daredevilish-Instruct", - "developer": "agentlans", - "inference_platform": "unknown", - "id": "agentlans/Llama3.1-Daredevilish-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7925969760236173 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5235442557198345 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17220543806646527 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3910833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3877160904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/agentlans/agentlans_Qwen2.5-0.5B-Instruct-CrashCourse-dropout/ad130d6f-6a5e-447a-a1ee-bfa2d93e5336.json b/leaderboard_data/HFOpenLLMv2/agentlans/agentlans_Qwen2.5-0.5B-Instruct-CrashCourse-dropout/ad130d6f-6a5e-447a-a1ee-bfa2d93e5336.json deleted file mode 100644 index f394063013e4754b57ea25517b42aaf23a1f843a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/agentlans/agentlans_Qwen2.5-0.5B-Instruct-CrashCourse-dropout/ad130d6f-6a5e-447a-a1ee-bfa2d93e5336.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/agentlans_Qwen2.5-0.5B-Instruct-CrashCourse-dropout/1762652579.9778361", - "retrieved_timestamp": "1762652579.977837", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout", - "developer": "agentlans", - "inference_platform": "unknown", - "id": "agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2948831323111566 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3311726760218689 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3341875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16082114361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ai21labs/ai21labs_Jamba-v0.1/e9546f28-0f6b-449e-a2b3-c6ab262103cc.json b/leaderboard_data/HFOpenLLMv2/ai21labs/ai21labs_Jamba-v0.1/e9546f28-0f6b-449e-a2b3-c6ab262103cc.json deleted file mode 100644 index 489d7fd080b4b7bc71db71906b1a475f818fac20..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ai21labs/ai21labs_Jamba-v0.1/e9546f28-0f6b-449e-a2b3-c6ab262103cc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ai21labs_Jamba-v0.1/1762652579.978585", - "retrieved_timestamp": "1762652579.978585", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ai21labs/Jamba-v0.1", - "developer": "ai21labs", - "inference_platform": "unknown", - "id": "ai21labs/Jamba-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20255920956395698 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36022602451645724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35902083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916888297872342 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "JambaForCausalLM", - "params_billions": 51.57 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ai4bharat/ai4bharat_Airavata/350b0559-6331-4b8b-82e2-0463baea9d8a.json b/leaderboard_data/HFOpenLLMv2/ai4bharat/ai4bharat_Airavata/350b0559-6331-4b8b-82e2-0463baea9d8a.json deleted file mode 100644 index 35ff3e5b00d09f03861477b77e867bd1e274e28f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ai4bharat/ai4bharat_Airavata/350b0559-6331-4b8b-82e2-0463baea9d8a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ai4bharat_Airavata/1762652579.978861", - "retrieved_timestamp": "1762652579.978862", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ai4bharat/Airavata", - "developer": "ai4bharat", - "inference_platform": "unknown", - "id": "ai4bharat/Airavata" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05585402288150995 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36276862514633795 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3762916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1634807180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.87 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/aixonlab/aixonlab_Aether-12b/831b6f81-1552-4a7b-acac-eb927001e440.json b/leaderboard_data/HFOpenLLMv2/aixonlab/aixonlab_Aether-12b/831b6f81-1552-4a7b-acac-eb927001e440.json deleted file mode 100644 index ff491bec3c35b6995328f768aac5b5e96c863071..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/aixonlab/aixonlab_Aether-12b/831b6f81-1552-4a7b-acac-eb927001e440.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/aixonlab_Aether-12b/1762652579.979132", - "retrieved_timestamp": "1762652579.979133", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "aixonlab/Aether-12b", - "developer": "aixonlab", - "inference_platform": "unknown", - "id": "aixonlab/Aether-12b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23468286369056326 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5179400750435481 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38286458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3410073138297872 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/aixonlab/aixonlab_Grey-12b/2c4626c7-3016-4641-9862-0ba4f7f7936c.json b/leaderboard_data/HFOpenLLMv2/aixonlab/aixonlab_Grey-12b/2c4626c7-3016-4641-9862-0ba4f7f7936c.json deleted file mode 100644 index 9ea3b7a6c92848457c603578b5a8986f01addccb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/aixonlab/aixonlab_Grey-12b/2c4626c7-3016-4641-9862-0ba4f7f7936c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/aixonlab_Grey-12b/1762652579.979384", - "retrieved_timestamp": "1762652579.9793851", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "aixonlab/Grey-12b", - "developer": "aixonlab", - "inference_platform": "unknown", - "id": "aixonlab/Grey-12b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39679938119744496 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5698957505959833 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09818731117824774 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4516354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3779089095744681 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/aixonlab/aixonlab_Zara-14b-v1.2/a4c3ddcb-482c-47fb-9290-3c0678b38fb4.json b/leaderboard_data/HFOpenLLMv2/aixonlab/aixonlab_Zara-14b-v1.2/a4c3ddcb-482c-47fb-9290-3c0678b38fb4.json deleted file mode 100644 index bff3ca2b03bfb20eb1b0253e54b530110a3fec17..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/aixonlab/aixonlab_Zara-14b-v1.2/a4c3ddcb-482c-47fb-9290-3c0678b38fb4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/aixonlab_Zara-14b-v1.2/1762652579.979647", - "retrieved_timestamp": "1762652579.979647", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "aixonlab/Zara-14b-v1.2", - "developer": "aixonlab", - "inference_platform": "unknown", - "id": "aixonlab/Zara-14b-v1.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6197400674654362 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6405368457456163 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46747916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5263464095744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/1-800-LLMs_Qwen-2.5-14B-Hindi/21ba6052-9614-454e-999d-ef4f0f693c6c.json b/leaderboard_data/HFOpenLLMv2/alibaba/1-800-LLMs_Qwen-2.5-14B-Hindi/21ba6052-9614-454e-999d-ef4f0f693c6c.json deleted file mode 100644 index c2dc37ec641b072aea799f861eead8b0011f591e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/1-800-LLMs_Qwen-2.5-14B-Hindi/21ba6052-9614-454e-999d-ef4f0f693c6c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/1-800-LLMs_Qwen-2.5-14B-Hindi/1762652579.467683", - "retrieved_timestamp": "1762652579.4676852", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "1-800-LLMs/Qwen-2.5-14B-Hindi", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "1-800-LLMs/Qwen-2.5-14B-Hindi" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.582570911847232 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6523901531956199 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3330815709969788 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624161073825503 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4489375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5262632978723404 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/1024m_QWEN-14B-B100/745bd077-3a0f-4c06-8d19-d7c160512446.json b/leaderboard_data/HFOpenLLMv2/alibaba/1024m_QWEN-14B-B100/745bd077-3a0f-4c06-8d19-d7c160512446.json deleted file mode 100644 index 4b46bbafc4cd8c44c3bc32b7708cea48f814f7ec..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/1024m_QWEN-14B-B100/745bd077-3a0f-4c06-8d19-d7c160512446.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/1024m_QWEN-14B-B100/1762652579.468843", - "retrieved_timestamp": "1762652579.4688451", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "1024m/QWEN-14B-B100", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "1024m/QWEN-14B-B100" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7762104549262623 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.653271132679638 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5438066465256798 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5178690159574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Aashraf995_Qwen-Evo-7B/705ae322-fed9-4a98-a79e-e0b289065ba9.json b/leaderboard_data/HFOpenLLMv2/alibaba/Aashraf995_Qwen-Evo-7B/705ae322-fed9-4a98-a79e-e0b289065ba9.json deleted file mode 100644 index 51d57e54d9ec3b303586c94b35d76b9cb916274a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Aashraf995_Qwen-Evo-7B/705ae322-fed9-4a98-a79e-e0b289065ba9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Aashraf995_Qwen-Evo-7B/1762652579.4765608", - "retrieved_timestamp": "1762652579.476562", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Aashraf995/Qwen-Evo-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Aashraf995/Qwen-Evo-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4757343847657549 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5709361538590277 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31419939577039274 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4541458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44622672872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Aashraf995_QwenStock-14B/7888b813-8ef1-4367-8168-edd1bd3c7888.json b/leaderboard_data/HFOpenLLMv2/alibaba/Aashraf995_QwenStock-14B/7888b813-8ef1-4367-8168-edd1bd3c7888.json deleted file mode 100644 index 953f8a0fc0caec8d400adf6fa4a01dbbf8720be8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Aashraf995_QwenStock-14B/7888b813-8ef1-4367-8168-edd1bd3c7888.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Aashraf995_QwenStock-14B/1762652579.476816", - "retrieved_timestamp": "1762652579.476817", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Aashraf995/QwenStock-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Aashraf995/QwenStock-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5008632650256873 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6550130348108012 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35725075528700906 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38926174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4792604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5382313829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Alsebay_Qwen2.5-7B-test-novelist/19ff3120-2171-48b3-8db6-1c76bb57cf47.json b/leaderboard_data/HFOpenLLMv2/alibaba/Alsebay_Qwen2.5-7B-test-novelist/19ff3120-2171-48b3-8db6-1c76bb57cf47.json deleted file mode 100644 index 863c018c78cef0dbba314bca52d08622eee0fe0b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Alsebay_Qwen2.5-7B-test-novelist/19ff3120-2171-48b3-8db6-1c76bb57cf47.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Alsebay_Qwen2.5-7B-test-novelist/1762652579.479883", - "retrieved_timestamp": "1762652579.4798841", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Alsebay/Qwen2.5-7B-test-novelist", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Alsebay/Qwen2.5-7B-test-novelist" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5351600420218354 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.515121518446605 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2348942598187311 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47488541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3865525265957447 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Aryanne_QwentileSwap/ee2c5dd9-09db-45fa-8e67-961993d30672.json b/leaderboard_data/HFOpenLLMv2/alibaba/Aryanne_QwentileSwap/ee2c5dd9-09db-45fa-8e67-961993d30672.json deleted file mode 100644 index 30f9c1793be9b6689c2d90ca62bd6a7f865d5642..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Aryanne_QwentileSwap/ee2c5dd9-09db-45fa-8e67-961993d30672.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Aryanne_QwentileSwap/1762652579.4827101", - "retrieved_timestamp": "1762652579.482711", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Aryanne/QwentileSwap", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Aryanne/QwentileSwap" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7378422585406721 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7008370136278447 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42220543806646527 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3674496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4640416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5945811170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/AtAndDev_Qwen2.5-1.5B-continuous-learnt/1a2d8396-4ff1-4386-a76b-d4863c7736c5.json b/leaderboard_data/HFOpenLLMv2/alibaba/AtAndDev_Qwen2.5-1.5B-continuous-learnt/1a2d8396-4ff1-4386-a76b-d4863c7736c5.json deleted file mode 100644 index ea911d3f094da527dad8bb2ab4e518b2f970145a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/AtAndDev_Qwen2.5-1.5B-continuous-learnt/1a2d8396-4ff1-4386-a76b-d4863c7736c5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AtAndDev_Qwen2.5-1.5B-continuous-learnt/1762652579.483878", - "retrieved_timestamp": "1762652579.4838789", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AtAndDev/Qwen2.5-1.5B-continuous-learnt", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "AtAndDev/Qwen2.5-1.5B-continuous-learnt" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45105431366551857 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42746984992662185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1472809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36228124999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28058510638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/AtAndDev_Qwen2.5-1.5B-continuous-learnt/4f7f368f-0646-4c16-80de-69d9c5e28193.json b/leaderboard_data/HFOpenLLMv2/alibaba/AtAndDev_Qwen2.5-1.5B-continuous-learnt/4f7f368f-0646-4c16-80de-69d9c5e28193.json deleted file mode 100644 index 1203840e510f0aeb1ff16a66cd6e8aa9c993b04c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/AtAndDev_Qwen2.5-1.5B-continuous-learnt/4f7f368f-0646-4c16-80de-69d9c5e28193.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AtAndDev_Qwen2.5-1.5B-continuous-learnt/1762652579.483521", - "retrieved_timestamp": "1762652579.483522", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AtAndDev/Qwen2.5-1.5B-continuous-learnt", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "AtAndDev/Qwen2.5-1.5B-continuous-learnt" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4605214165081982 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42577470857933336 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07477341389728097 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3636458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28116688829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES/dcd14b21-f2fd-4c10-bf83-b6bb946f2789.json b/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES/dcd14b21-f2fd-4c10-bf83-b6bb946f2789.json deleted file mode 100644 index f5b275fdb36732723c11c3c33cb1f1fb58699f6c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES/dcd14b21-f2fd-4c10-bf83-b6bb946f2789.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CombinHorizon_Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES/1762652579.508495", - "retrieved_timestamp": "1762652579.5084958", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CombinHorizon/Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CombinHorizon/Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8239958864701216 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6370093752306357 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42603125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4979222074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES/3171e54f-4c6f-40cf-ba6c-ef23b803ca33.json b/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES/3171e54f-4c6f-40cf-ba6c-ef23b803ca33.json deleted file mode 100644 index aefa35b7cd101d89a60c73b2f667448e0d5443bf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES/3171e54f-4c6f-40cf-ba6c-ef23b803ca33.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CombinHorizon_Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES/1762652579.508758", - "retrieved_timestamp": "1762652579.508759", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CombinHorizon/Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CombinHorizon/Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7564019025075688 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5402085849577634 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.493202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40330208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4341755319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES/62faed28-8f0f-4ff8-894f-b4b5b754b4cf.json b/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES/62faed28-8f0f-4ff8-894f-b4b5b754b4cf.json deleted file mode 100644 index b282f2f20a1ba617e25723a18a2e782780f9027c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES/62faed28-8f0f-4ff8-894f-b4b5b754b4cf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CombinHorizon_huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES/1762652579.509247", - "retrieved_timestamp": "1762652579.509248", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CombinHorizon/huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CombinHorizon/huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8206237228331937 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.692924708291253 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5944108761329305 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3389261744966443 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42072916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5720578457446809 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES/62b4c918-b33b-40cf-888b-42b116a9e04d.json b/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES/62b4c918-b33b-40cf-888b-42b116a9e04d.json deleted file mode 100644 index d9b653064f708af24431ef18971d444433110e8f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES/62b4c918-b33b-40cf-888b-42b116a9e04d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CombinHorizon_huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES/1762652579.509461", - "retrieved_timestamp": "1762652579.509462", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CombinHorizon/huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CombinHorizon/huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8175762532303177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6335891556421077 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.547583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42603125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4910239361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES/3bf71784-e6f1-405b-ad23-e74a91df7051.json b/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES/3bf71784-e6f1-405b-ad23-e74a91df7051.json deleted file mode 100644 index ef98e5850da33a438c55eb71f7382efffb4f7eed..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES/3bf71784-e6f1-405b-ad23-e74a91df7051.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CombinHorizon_zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES/1762652579.509675", - "retrieved_timestamp": "1762652579.509676", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CombinHorizon/zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CombinHorizon/zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8328136012446974 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6955174427138592 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5853474320241692 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3674496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43139583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5684840425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CoolSpring_Qwen2-0.5B-Abyme-merge2/2121d736-eec6-4a86-bae0-cd032f9eb603.json b/leaderboard_data/HFOpenLLMv2/alibaba/CoolSpring_Qwen2-0.5B-Abyme-merge2/2121d736-eec6-4a86-bae0-cd032f9eb603.json deleted file mode 100644 index a5fa40c3c1a2ce84722d5210dfd624d1427a00d9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CoolSpring_Qwen2-0.5B-Abyme-merge2/2121d736-eec6-4a86-bae0-cd032f9eb603.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CoolSpring_Qwen2-0.5B-Abyme-merge2/1762652579.511093", - "retrieved_timestamp": "1762652579.511094", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CoolSpring/Qwen2-0.5B-Abyme-merge2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CoolSpring/Qwen2-0.5B-Abyme-merge2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2021846478454944 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29942723009138733 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3687291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14893617021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CoolSpring_Qwen2-0.5B-Abyme-merge3/2a633e8b-b35a-4a26-83bb-b471bab18ed2.json b/leaderboard_data/HFOpenLLMv2/alibaba/CoolSpring_Qwen2-0.5B-Abyme-merge3/2a633e8b-b35a-4a26-83bb-b471bab18ed2.json deleted file mode 100644 index d1dfaaaee072fc7e5a29aaac3382c6986c60859d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CoolSpring_Qwen2-0.5B-Abyme-merge3/2a633e8b-b35a-4a26-83bb-b471bab18ed2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CoolSpring_Qwen2-0.5B-Abyme-merge3/1762652579.51142", - "retrieved_timestamp": "1762652579.511421", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CoolSpring/Qwen2-0.5B-Abyme-merge3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CoolSpring/Qwen2-0.5B-Abyme-merge3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23860468002677343 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30031404525933675 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35009375000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15001662234042554 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CoolSpring_Qwen2-0.5B-Abyme/46d2afd2-b620-4474-ac6c-4f6bdef93d1c.json b/leaderboard_data/HFOpenLLMv2/alibaba/CoolSpring_Qwen2-0.5B-Abyme/46d2afd2-b620-4474-ac6c-4f6bdef93d1c.json deleted file mode 100644 index 52bff1e7717a523fc97345e9a9ca7e0e55791c8d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CoolSpring_Qwen2-0.5B-Abyme/46d2afd2-b620-4474-ac6c-4f6bdef93d1c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CoolSpring_Qwen2-0.5B-Abyme/1762652579.5106628", - "retrieved_timestamp": "1762652579.510665", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CoolSpring/Qwen2-0.5B-Abyme", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CoolSpring/Qwen2-0.5B-Abyme" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19151850423542865 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2861834296481826 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35421875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13331117021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Broca/4429613e-2db7-4061-931f-eaa70d202b71.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Broca/4429613e-2db7-4061-931f-eaa70d202b71.json deleted file mode 100644 index 11ab7889547abb3d6a7d2bf2b77e5ff1d40a7a82..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Broca/4429613e-2db7-4061-931f-eaa70d202b71.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Broca/1762652579.5150259", - "retrieved_timestamp": "1762652579.5150259", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Broca", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Broca" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.560414145578177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6527145981540362 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3580060422960725 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38674496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47665625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5364029255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-BrocaV9/782219f0-25f7-465b-9f86-5e48c9d4703e.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-BrocaV9/782219f0-25f7-465b-9f86-5e48c9d4703e.json deleted file mode 100644 index 82a610c8f73f051341931e3992dc6dbefa5b42d4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-BrocaV9/782219f0-25f7-465b-9f86-5e48c9d4703e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-BrocaV9/1762652579.515307", - "retrieved_timestamp": "1762652579.5153081", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-BrocaV9", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-BrocaV9" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6762933460994606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6391383585238984 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3814199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640939597315436 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46903125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5330784574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Brocav3/7abe4912-4e21-4774-8011-482603f7bcc0.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Brocav3/7abe4912-4e21-4774-8011-482603f7bcc0.json deleted file mode 100644 index e30b9b3168207856815c04e97e4d5a3b5bcfd333..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Brocav3/7abe4912-4e21-4774-8011-482603f7bcc0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Brocav3/1762652579.5155342", - "retrieved_timestamp": "1762652579.515535", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Brocav3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Brocav3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6951776841004091 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6452353476182755 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38746223564954685 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35906040268456374 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4756354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.531748670212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Brocav6/63a1000f-1de8-42ef-a905-70b78bf46417.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Brocav6/63a1000f-1de8-42ef-a905-70b78bf46417.json deleted file mode 100644 index 04c33b1bb956a797981fa725ddb7520dc6ec942a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Brocav6/63a1000f-1de8-42ef-a905-70b78bf46417.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Brocav6/1762652579.515748", - "retrieved_timestamp": "1762652579.5157492", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Brocav6", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Brocav6" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6995239298394925 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6388835266626555 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38746223564954685 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3674496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47420833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5319148936170213 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Brocav7/6966d397-d336-455a-a156-c2e6430c813f.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Brocav7/6966d397-d336-455a-a156-c2e6430c813f.json deleted file mode 100644 index 01716e63ee6c34fa241c6b87b653eac6fb26f680..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Brocav7/6966d397-d336-455a-a156-c2e6430c813f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Brocav7/1762652579.5159612", - "retrieved_timestamp": "1762652579.5159621", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Brocav7", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Brocav7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6723715297632504 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6444026981327182 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3674496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47960416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5257646276595744 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Emerged/15af5216-fc3d-4102-bbed-eb5b7d0ecf48.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Emerged/15af5216-fc3d-4102-bbed-eb5b7d0ecf48.json deleted file mode 100644 index 9fdbeaac28195a51178f51d735e32fdc4e693d33..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Emerged/15af5216-fc3d-4102-bbed-eb5b7d0ecf48.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Emerged/1762652579.516177", - "retrieved_timestamp": "1762652579.516178", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Emerged", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Emerged" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7000237148543642 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6260033680703311 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3573825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46909375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5186170212765957 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Emergedv3/7b125482-fd80-4f71-b398-9421333ee736.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Emergedv3/7b125482-fd80-4f71-b398-9421333ee736.json deleted file mode 100644 index 113bc690821f647d26091e36cd69a6cd60750efc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Emergedv3/7b125482-fd80-4f71-b398-9421333ee736.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Emergedv3/1762652579.516385", - "retrieved_timestamp": "1762652579.516386", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Emergedv3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Emergedv3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6388493641316153 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6190728411056029 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36073825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4728125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5173703457446809 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-FinalMerge/36ebe0b7-51ae-4ea5-ba42-c9fd0d717259.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-FinalMerge/36ebe0b7-51ae-4ea5-ba42-c9fd0d717259.json deleted file mode 100644 index 57e08c13fb0d62d5383b3cddc685d703e4a2687b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-FinalMerge/36ebe0b7-51ae-4ea5-ba42-c9fd0d717259.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-FinalMerge/1762652579.516642", - "retrieved_timestamp": "1762652579.516643", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-FinalMerge", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-FinalMerge" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48909781601705693 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5714945310011449 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3814199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3548657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43790625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4574468085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Hyper/8412921a-ad8c-4106-a3a1-9259d2ddb074.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Hyper/8412921a-ad8c-4106-a3a1-9259d2ddb074.json deleted file mode 100644 index be5b29aafe59125efbbda2c8873bfa845f8bd3a5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Hyper/8412921a-ad8c-4106-a3a1-9259d2ddb074.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Hyper/1762652579.516851", - "retrieved_timestamp": "1762652579.516851", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Hyper", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Hyper" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5391317260424563 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6507453346766106 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39177852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48983333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5374002659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-HyperMarck-dl/5b6ef372-86e5-4fc1-85ba-5a76517bb10f.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-HyperMarck-dl/5b6ef372-86e5-4fc1-85ba-5a76517bb10f.json deleted file mode 100644 index 27f04b8e84e0c8b24953436e65e4b1a1f124599d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-HyperMarck-dl/5b6ef372-86e5-4fc1-85ba-5a76517bb10f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-HyperMarck-dl/1762652579.5170581", - "retrieved_timestamp": "1762652579.517059", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-HyperMarck-dl", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-HyperMarck-dl" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6650276821057017 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6096480033153927 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3674496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4415625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5090591755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Hyperionv3/d6700ad3-d858-4420-96b1-d690984ebcaa.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Hyperionv3/d6700ad3-d858-4420-96b1-d690984ebcaa.json deleted file mode 100644 index bc6ba41ada3a449021e436b1ac914add63ffe954..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Hyperionv3/d6700ad3-d858-4420-96b1-d690984ebcaa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Hyperionv3/1762652579.517266", - "retrieved_timestamp": "1762652579.517267", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Hyperionv3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Hyperionv3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6836371937570092 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6522165609411941 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37009063444108764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37080536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47296875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5339926861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Hyperionv4/7c4a43f8-be43-44d7-a514-f02b70ec367c.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Hyperionv4/7c4a43f8-be43-44d7-a514-f02b70ec367c.json deleted file mode 100644 index ff7c00a8f6b759095aa991d173a5baf3604f68b1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Hyperionv4/7c4a43f8-be43-44d7-a514-f02b70ec367c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Hyperionv4/1762652579.517484", - "retrieved_timestamp": "1762652579.517484", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Hyperionv4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Hyperionv4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5415796752616391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6471791978856551 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3976510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48319791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5364029255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Hyperionv5/5b1e2a5e-cd92-4ad4-b12d-0540461f9f5e.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Hyperionv5/5b1e2a5e-cd92-4ad4-b12d-0540461f9f5e.json deleted file mode 100644 index 9d9d6d63e8c8cf9b24ac117d236e28a2b5c91b53..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Hyperionv5/5b1e2a5e-cd92-4ad4-b12d-0540461f9f5e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Hyperionv5/1762652579.517704", - "retrieved_timestamp": "1762652579.517704", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Hyperionv5", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Hyperionv5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6729211824625327 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.644265785086055 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3821752265861027 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716442953020134 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4795416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5301695478723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-MegaMerge-pt2/f269bb45-d627-49b9-953b-5c8591433aa7.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-MegaMerge-pt2/f269bb45-d627-49b9-953b-5c8591433aa7.json deleted file mode 100644 index 529ec4744b2edacf9f005a090473b93f47810a65..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-MegaMerge-pt2/f269bb45-d627-49b9-953b-5c8591433aa7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-MegaMerge-pt2/1762652579.517905", - "retrieved_timestamp": "1762652579.517906", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-MegaMerge-pt2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-MegaMerge-pt2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.568307645935008 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6577703330510146 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3995468277945619 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.472875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5420545212765957 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-MergeStock/c1db0f86-a3d9-4aa4-9fe3-0442fc63ad25.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-MergeStock/c1db0f86-a3d9-4aa4-9fe3-0442fc63ad25.json deleted file mode 100644 index e9c57e443454838d92033092cb2fc25e11d1a316..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-MergeStock/c1db0f86-a3d9-4aa4-9fe3-0442fc63ad25.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-MergeStock/1762652579.518343", - "retrieved_timestamp": "1762652579.518346", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-MergeStock", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-MergeStock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5685326046002386 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6579336391923106 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41465256797583083 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4676354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.539561170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-ReasoningMerge/df6199fa-3797-4b88-b5fc-e429f513932b.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-ReasoningMerge/df6199fa-3797-4b88-b5fc-e429f513932b.json deleted file mode 100644 index 9d854e9923cc0570a403c1a3ef34d5b8f8d9ada0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-ReasoningMerge/df6199fa-3797-4b88-b5fc-e429f513932b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-ReasoningMerge/1762652579.518682", - "retrieved_timestamp": "1762652579.518684", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-ReasoningMerge", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-ReasoningMerge" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46054690443578594 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6578226399295218 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.520392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4077181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5165937500000001 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5344913563829787 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Ultimav2/b76ac8f6-7355-4bbf-ad8f-d8fc967120a1.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Ultimav2/b76ac8f6-7355-4bbf-ad8f-d8fc967120a1.json deleted file mode 100644 index b7a0723ea885480e7e572d0bccb471e307efa81a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Ultimav2/b76ac8f6-7355-4bbf-ad8f-d8fc967120a1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Ultimav2/1762652579.519061", - "retrieved_timestamp": "1762652579.5190778", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Ultimav2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Ultimav2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5500228283177524 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6555027486976712 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3850671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4965625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5417220744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Unity/efd5d269-fc83-43f0-9054-dc3bdf40f180.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Unity/efd5d269-fc83-43f0-9054-dc3bdf40f180.json deleted file mode 100644 index 8c4503dbd6727b8c3c6c9580e18efcf7f2924342..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Unity/efd5d269-fc83-43f0-9054-dc3bdf40f180.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Unity/1762652579.519516", - "retrieved_timestamp": "1762652579.519517", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Unity", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Unity" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6738952645646883 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6019955540977778 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4312688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4679479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.507563164893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Wernicke-SLERP/8359ce66-d904-4092-92be-5e2dbb372677.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Wernicke-SLERP/8359ce66-d904-4092-92be-5e2dbb372677.json deleted file mode 100644 index a9b8408a1b0ce7b94cd8ca17ff37941f4e02b95d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Wernicke-SLERP/8359ce66-d904-4092-92be-5e2dbb372677.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Wernicke-SLERP/1762652579.5203562", - "retrieved_timestamp": "1762652579.5203571", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Wernicke-SLERP", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Wernicke-SLERP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5588904107767391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6440929009604598 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4486404833836858 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41403125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5093916223404256 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.491 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Wernicke/6c2287bb-69b0-4b23-ba15-ff4a600e4aa7.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Wernicke/6c2287bb-69b0-4b23-ba15-ff4a600e4aa7.json deleted file mode 100644 index 9e3cbd3e3c04117efcd809c680c2378378bd3307..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Wernicke/6c2287bb-69b0-4b23-ba15-ff4a600e4aa7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Wernicke/1762652579.519787", - "retrieved_timestamp": "1762652579.519788", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Wernicke", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Wernicke" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5234699486252034 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6568359662501574 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3814199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46890625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5423869680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Wernickev3/a4f5037a-381b-4726-b90d-ba559058772c.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Wernickev3/a4f5037a-381b-4726-b90d-ba559058772c.json deleted file mode 100644 index dc17ed671192c241f17539d4526a5597e23afeb8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Wernickev3/a4f5037a-381b-4726-b90d-ba559058772c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Wernickev3/1762652579.520611", - "retrieved_timestamp": "1762652579.520612", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-Wernickev3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-Wernickev3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7048198779239085 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6184146992839421 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3542296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624161073825503 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4716666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.515126329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-partialmergept1/852ffa19-285b-4037-ac60-63f24cafcecb.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-partialmergept1/852ffa19-285b-4037-ac60-63f24cafcecb.json deleted file mode 100644 index 684385f0a1c8ff2c063f20a1d84f18b9603eb6af..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-partialmergept1/852ffa19-285b-4037-ac60-63f24cafcecb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-partialmergept1/1762652579.5208588", - "retrieved_timestamp": "1762652579.52086", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/Qwen2.5-14B-partialmergept1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwen2.5-14B-partialmergept1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.633728507028019 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6151178406213536 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45392749244712993 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615771812080537 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47569791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5207779255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwenfinity-2.5-14B/4fba9290-886e-490d-aaeb-068f8c679006.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwenfinity-2.5-14B/4fba9290-886e-490d-aaeb-068f8c679006.json deleted file mode 100644 index c1626f23694327f36297eb01b481de8b53fe7c9a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwenfinity-2.5-14B/4fba9290-886e-490d-aaeb-068f8c679006.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_Qwenfinity-2.5-14B/1762652579.521086", - "retrieved_timestamp": "1762652579.521087", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/Qwenfinity-2.5-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/Qwenfinity-2.5-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4813794066410457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5655007271970033 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45058333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4498005319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14B-EvolMerge/44823eb6-717b-4508-a745-7821545dd3c2.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14B-EvolMerge/44823eb6-717b-4508-a745-7821545dd3c2.json deleted file mode 100644 index f68077ce16968c35782d06081ae53f5b32e7cba8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14B-EvolMerge/44823eb6-717b-4508-a745-7821545dd3c2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14B-EvolMerge/1762652579.5218382", - "retrieved_timestamp": "1762652579.5218382", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/SeQwence-14B-EvolMerge", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/SeQwence-14B-EvolMerge" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5381576439403006 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6572183434723883 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48208333333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5418882978723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14B-EvolMergev1/e2621a1f-af39-48fe-a56b-18e9b396a476.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14B-EvolMergev1/e2621a1f-af39-48fe-a56b-18e9b396a476.json deleted file mode 100644 index 0a738c856254aaad82e5c96223da8dc00c15b64d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14B-EvolMergev1/e2621a1f-af39-48fe-a56b-18e9b396a476.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14B-EvolMergev1/1762652579.5221288", - "retrieved_timestamp": "1762652579.52213", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/SeQwence-14B-EvolMergev1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/SeQwence-14B-EvolMergev1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5554683794554005 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6545547382762975 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4214501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3766778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46227083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.539311835106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14B-v5/6a7ae44e-93f6-4371-b3a6-585a099aa7c7.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14B-v5/6a7ae44e-93f6-4371-b3a6-585a099aa7c7.json deleted file mode 100644 index 4c6ed82a3cd3f43270c51121b287e7fb304f84af..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14B-v5/6a7ae44e-93f6-4371-b3a6-585a099aa7c7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14B-v5/1762652579.522369", - "retrieved_timestamp": "1762652579.522369", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/SeQwence-14B-v5", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/SeQwence-14B-v5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5919881470055011 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6517093605796943 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33081570996978854 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699664429530201 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47141666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5414727393617021 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14B/b9f3e9d1-e1f9-44cd-9067-c949adfbe553.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14B/b9f3e9d1-e1f9-44cd-9067-c949adfbe553.json deleted file mode 100644 index b4de2b6ae4b58c606f7d1f625e471a78dbea58e2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14B/b9f3e9d1-e1f9-44cd-9067-c949adfbe553.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14B/1762652579.521544", - "retrieved_timestamp": "1762652579.521545", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/SeQwence-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/SeQwence-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5351600420218354 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6505665291288972 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36073825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46661458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5418882978723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14Bv1/f4505219-fc0d-4f7b-ad71-3c9fef064c28.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14Bv1/f4505219-fc0d-4f7b-ad71-3c9fef064c28.json deleted file mode 100644 index b6ba23a9eda9baccaa9fcaeb0a6dc9fdaf7d318b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14Bv1/f4505219-fc0d-4f7b-ad71-3c9fef064c28.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14Bv1/1762652579.522592", - "retrieved_timestamp": "1762652579.522593", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/SeQwence-14Bv1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/SeQwence-14Bv1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6678003253589365 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6344673727103446 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3610271903323263 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615771812080537 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47042708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.531998005319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14Bv2/49eccc70-6321-451b-87e9-29907cfb53a0.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14Bv2/49eccc70-6321-451b-87e9-29907cfb53a0.json deleted file mode 100644 index a5480649904b02d556880d6e10a57028402d626e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14Bv2/49eccc70-6321-451b-87e9-29907cfb53a0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14Bv2/1762652579.5228019", - "retrieved_timestamp": "1762652579.5228028", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/SeQwence-14Bv2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/SeQwence-14Bv2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5785992278266112 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6304512627108576 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47583081570996977 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36073825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4601041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5334109042553191 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14Bv3/4857c00b-e4fb-417a-8b63-a5b7e9298b40.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14Bv3/4857c00b-e4fb-417a-8b63-a5b7e9298b40.json deleted file mode 100644 index 40dab72e5ee5d6ee553fc59e2bf76273e620b7e5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14Bv3/4857c00b-e4fb-417a-8b63-a5b7e9298b40.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14Bv3/1762652579.523057", - "retrieved_timestamp": "1762652579.523058", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CultriX/SeQwence-14Bv3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "CultriX/SeQwence-14Bv3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5719047682371663 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6302253848409948 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47658610271903323 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3649328859060403 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4624270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5334940159574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Danielbrdz_Barcenas-R1-Qwen-1.5b/c5330fb2-e914-4170-81f8-77a317ba557c.json b/leaderboard_data/HFOpenLLMv2/alibaba/Danielbrdz_Barcenas-R1-Qwen-1.5b/c5330fb2-e914-4170-81f8-77a317ba557c.json deleted file mode 100644 index ff582637b172f869f08fbbda3a70923d9da406c3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Danielbrdz_Barcenas-R1-Qwen-1.5b/c5330fb2-e914-4170-81f8-77a317ba557c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-R1-Qwen-1.5b/1762652579.5346482", - "retrieved_timestamp": "1762652579.5346491", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Danielbrdz/Barcenas-R1-Qwen-1.5b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Danielbrdz/Barcenas-R1-Qwen-1.5b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24280132271262472 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35872011187392944 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3496978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.354125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19090757978723405 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/DavidAU_DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm/4b7dd9db-5e94-4885-96f8-189af8d97c09.json b/leaderboard_data/HFOpenLLMv2/alibaba/DavidAU_DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm/4b7dd9db-5e94-4885-96f8-189af8d97c09.json deleted file mode 100644 index dc0d69f0e043ef213fecf3494e14d6354be0054f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/DavidAU_DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm/4b7dd9db-5e94-4885-96f8-189af8d97c09.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavidAU_DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm/1762652579.53886", - "retrieved_timestamp": "1762652579.53886", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavidAU/DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "DavidAU/DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34159474638403875 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.580689592371853 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5536253776435045 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5155104166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4623503989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 25.506 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/DavidAU_Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B/78e7f7ee-3677-499a-aa36-2e8bf0902bf0.json b/leaderboard_data/HFOpenLLMv2/alibaba/DavidAU_Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B/78e7f7ee-3677-499a-aa36-2e8bf0902bf0.json deleted file mode 100644 index ada50a45c87b2e2ef703b61964733ddf315a3a74..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/DavidAU_Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B/78e7f7ee-3677-499a-aa36-2e8bf0902bf0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavidAU_Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B/1762652579.543009", - "retrieved_timestamp": "1762652579.543009", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavidAU/Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "DavidAU/Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17832905579418165 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30326053640004424 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3714583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11419547872340426 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 4.089 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/DavidAU_Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B/d65793ba-f363-4665-9ff5-1ac08e819d55.json b/leaderboard_data/HFOpenLLMv2/alibaba/DavidAU_Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B/d65793ba-f363-4665-9ff5-1ac08e819d55.json deleted file mode 100644 index 4d97c25e292922c64855527ac1f6db04a9a68612..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/DavidAU_Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B/d65793ba-f363-4665-9ff5-1ac08e819d55.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavidAU_Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B/1762652579.543224", - "retrieved_timestamp": "1762652579.543225", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavidAU/Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "DavidAU/Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28351773294857646 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35922718767499157 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24169184290030213 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38469791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1636469414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 19.022 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/DavidAU_Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32/c142222c-836d-493f-a9f8-857426e0573c.json b/leaderboard_data/HFOpenLLMv2/alibaba/DavidAU_Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32/c142222c-836d-493f-a9f8-857426e0573c.json deleted file mode 100644 index 9aab308d43490656c08480d6c239f93cd0a0176f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/DavidAU_Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32/c142222c-836d-493f-a9f8-857426e0573c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavidAU_Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32/1762652579.543571", - "retrieved_timestamp": "1762652579.543573", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavidAU/Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "DavidAU/Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21067766858601844 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32861776640637924 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3404479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11220079787234043 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 8.714 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita/6669c8b8-91d6-4f14-8cfb-a6422352850d.json b/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita/6669c8b8-91d6-4f14-8cfb-a6422352850d.json deleted file mode 100644 index 147df673e846b6411acd147f42388119f2398687..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita/6669c8b8-91d6-4f14-8cfb-a6422352850d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2-1.5B-Ita/1762652579.5521228", - "retrieved_timestamp": "1762652579.5521238", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepMount00/Qwen2-1.5B-Ita", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "DeepMount00/Qwen2-1.5B-Ita" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5173495214918638 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39805765159128703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35037500000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2771775265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita_v2/78ec8596-ee15-4e94-8bc8-77c6bdffc541.json b/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita_v2/78ec8596-ee15-4e94-8bc8-77c6bdffc541.json deleted file mode 100644 index b91ff2f79ef453be7fda26332f5b7882384e8783..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita_v2/78ec8596-ee15-4e94-8bc8-77c6bdffc541.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2-1.5B-Ita_v2/1762652579.552372", - "retrieved_timestamp": "1762652579.552373", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepMount00/Qwen2-1.5B-Ita_v2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "DeepMount00/Qwen2-1.5B-Ita_v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49998891829235315 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3953827803974795 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09667673716012085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37018749999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30319148936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita_v3/f9cac378-3bdb-4c66-8193-502773c5c5eb.json b/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita_v3/f9cac378-3bdb-4c66-8193-502773c5c5eb.json deleted file mode 100644 index 0b93ed10a83cec8b544558d5d8e2edeb416cf615..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita_v3/f9cac378-3bdb-4c66-8193-502773c5c5eb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2-1.5B-Ita_v3/1762652579.552576", - "retrieved_timestamp": "1762652579.552577", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepMount00/Qwen2-1.5B-Ita_v3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "DeepMount00/Qwen2-1.5B-Ita_v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4890479483326463 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3948478837209111 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37415624999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3017785904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita_v5/04f0529b-474c-42d2-99a8-e3bdd5c18eaf.json b/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita_v5/04f0529b-474c-42d2-99a8-e3bdd5c18eaf.json deleted file mode 100644 index 3846d0a27748dc6c967053bbd52fb4463f4bbccb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita_v5/04f0529b-474c-42d2-99a8-e3bdd5c18eaf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2-1.5B-Ita_v5/1762652579.552789", - "retrieved_timestamp": "1762652579.55279", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepMount00/Qwen2-1.5B-Ita_v5", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "DeepMount00/Qwen2-1.5B-Ita_v5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4987400098405564 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40320443289745417 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34225 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29429853723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita_v6/041f6e95-b7d1-44c6-a995-0c8257e188aa.json b/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita_v6/041f6e95-b7d1-44c6-a995-0c8257e188aa.json deleted file mode 100644 index d415525f251dd9f7f6dcadbd5d2faab79516b851..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita_v6/041f6e95-b7d1-44c6-a995-0c8257e188aa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2-1.5B-Ita_v6/1762652579.553008", - "retrieved_timestamp": "1762652579.5530088", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepMount00/Qwen2-1.5B-Ita_v6", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "DeepMount00/Qwen2-1.5B-Ita_v6" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29990425404593146 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42486081646897506 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3754583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28715093085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.497 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Dongwei_DeepSeek-R1-Distill-Qwen-7B-GRPO/b36b915f-3c4a-40e8-ab78-8442dbe116e1.json b/leaderboard_data/HFOpenLLMv2/alibaba/Dongwei_DeepSeek-R1-Distill-Qwen-7B-GRPO/b36b915f-3c4a-40e8-ab78-8442dbe116e1.json deleted file mode 100644 index 12c33828d63a82263a25f549025758c3e0f7db68..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Dongwei_DeepSeek-R1-Distill-Qwen-7B-GRPO/b36b915f-3c4a-40e8-ab78-8442dbe116e1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Dongwei_DeepSeek-R1-Distill-Qwen-7B-GRPO/1762652579.5556989", - "retrieved_timestamp": "1762652579.5557", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Dongwei/DeepSeek-R1-Distill-Qwen-7B-GRPO", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Dongwei/DeepSeek-R1-Distill-Qwen-7B-GRPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40376866713653103 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34425676981862185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19561933534743203 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36628124999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23221409574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/EVA-UNIT-01_EVA-Qwen2.5-14B-v0.2/3ba36700-5019-4525-bf5e-6a87cce7ecc5.json b/leaderboard_data/HFOpenLLMv2/alibaba/EVA-UNIT-01_EVA-Qwen2.5-14B-v0.2/3ba36700-5019-4525-bf5e-6a87cce7ecc5.json deleted file mode 100644 index 2ca33d6a71157d9c2db160dc89ae0d9ae37c9652..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/EVA-UNIT-01_EVA-Qwen2.5-14B-v0.2/3ba36700-5019-4525-bf5e-6a87cce7ecc5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EVA-UNIT-01_EVA-Qwen2.5-14B-v0.2/1762652579.5920892", - "retrieved_timestamp": "1762652579.5920892", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4038429145777648 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6090237540046592 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3406344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39429530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4794479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5135472074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/EVA-UNIT-01_EVA-Qwen2.5-72B-v0.2/9e315ba7-3eea-4934-822e-461e64bf8551.json b/leaderboard_data/HFOpenLLMv2/alibaba/EVA-UNIT-01_EVA-Qwen2.5-72B-v0.2/9e315ba7-3eea-4934-822e-461e64bf8551.json deleted file mode 100644 index 24f5f73cfb135a235de30e5aac54298ab9396623..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/EVA-UNIT-01_EVA-Qwen2.5-72B-v0.2/9e315ba7-3eea-4934-822e-461e64bf8551.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EVA-UNIT-01_EVA-Qwen2.5-72B-v0.2/1762652579.59233", - "retrieved_timestamp": "1762652579.592331", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6878837041272712 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7088012228048761 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4312688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4085570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47197916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.581283244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Etherll_Qwen2.5-7B-della-test/777b5587-70b2-472f-a6e4-820d653669cd.json b/leaderboard_data/HFOpenLLMv2/alibaba/Etherll_Qwen2.5-7B-della-test/777b5587-70b2-472f-a6e4-820d653669cd.json deleted file mode 100644 index 839c471d162576f57fb6bef5ceab5dd35e64456f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Etherll_Qwen2.5-7B-della-test/777b5587-70b2-472f-a6e4-820d653669cd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Etherll_Qwen2.5-7B-della-test/1762652579.614594", - "retrieved_timestamp": "1762652579.6145952", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Etherll/Qwen2.5-7B-della-test", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Etherll/Qwen2.5-7B-della-test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7624968417133207 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5447331985391859 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48942598187311176 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40469791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4360871010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/HPAI-BSC_Qwen2.5-Aloe-Beta-7B/a99dbb21-4f7d-4ac0-b403-2f8bf7aa92b1.json b/leaderboard_data/HFOpenLLMv2/alibaba/HPAI-BSC_Qwen2.5-Aloe-Beta-7B/a99dbb21-4f7d-4ac0-b403-2f8bf7aa92b1.json deleted file mode 100644 index 1b8712285cf63029893c7a77d1b7b39e688056a9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/HPAI-BSC_Qwen2.5-Aloe-Beta-7B/a99dbb21-4f7d-4ac0-b403-2f8bf7aa92b1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HPAI-BSC_Qwen2.5-Aloe-Beta-7B/1762652579.6368651", - "retrieved_timestamp": "1762652579.636866", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HPAI-BSC/Qwen2.5-Aloe-Beta-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "HPAI-BSC/Qwen2.5-Aloe-Beta-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4553506917201914 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5048995904321122 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3542296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42603125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4354222074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/HeraiHench_DeepSeek-R1-Qwen-Coder-8B/a0730f18-1058-44b4-b6b6-0881ae2e6338.json b/leaderboard_data/HFOpenLLMv2/alibaba/HeraiHench_DeepSeek-R1-Qwen-Coder-8B/a0730f18-1058-44b4-b6b6-0881ae2e6338.json deleted file mode 100644 index 0beb66054c47ea1b2b43db80daca926ada38b318..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/HeraiHench_DeepSeek-R1-Qwen-Coder-8B/a0730f18-1058-44b4-b6b6-0881ae2e6338.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HeraiHench_DeepSeek-R1-Qwen-Coder-8B/1762652579.6392472", - "retrieved_timestamp": "1762652579.639248", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HeraiHench/DeepSeek-R1-Qwen-Coder-8B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "HeraiHench/DeepSeek-R1-Qwen-Coder-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1869472998311148 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29134447696551025 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37384375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228390957446809 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 8.164 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/HeraiHench_Double-Down-Qwen-Math-7B/6e852e78-e666-413e-ac29-ad374bbc74f2.json b/leaderboard_data/HFOpenLLMv2/alibaba/HeraiHench_Double-Down-Qwen-Math-7B/6e852e78-e666-413e-ac29-ad374bbc74f2.json deleted file mode 100644 index 785cc16c5a55f21577327808a464fd7e9c3d43b2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/HeraiHench_Double-Down-Qwen-Math-7B/6e852e78-e666-413e-ac29-ad374bbc74f2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HeraiHench_Double-Down-Qwen-Math-7B/1762652579.63955", - "retrieved_timestamp": "1762652579.639551", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HeraiHench/Double-Down-Qwen-Math-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "HeraiHench/Double-Down-Qwen-Math-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1669636564316015 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2844613514203868 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37365625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11120345744680851 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/HeraiHench_Marge-Qwen-Math-7B/07f4a9dc-16d7-4b75-922f-09f8e9ebed7d.json b/leaderboard_data/HFOpenLLMv2/alibaba/HeraiHench_Marge-Qwen-Math-7B/07f4a9dc-16d7-4b75-922f-09f8e9ebed7d.json deleted file mode 100644 index 999966456f9658eec6545a41b30c1fcaf6aac8a2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/HeraiHench_Marge-Qwen-Math-7B/07f4a9dc-16d7-4b75-922f-09f8e9ebed7d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HeraiHench_Marge-Qwen-Math-7B/1762652579.6397812", - "retrieved_timestamp": "1762652579.639782", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HeraiHench/Marge-Qwen-Math-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "HeraiHench/Marge-Qwen-Math-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12622175826806206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3068846024368302 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23909395973154363 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39390624999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10555186170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen-0.5B-IRPO-1epoch/0cbb4771-926d-4cf6-a78b-a5f4ac4d5902.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen-0.5B-IRPO-1epoch/0cbb4771-926d-4cf6-a78b-a5f4ac4d5902.json deleted file mode 100644 index e76df15df62873caff85767da35a89658df19a26..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen-0.5B-IRPO-1epoch/0cbb4771-926d-4cf6-a78b-a5f4ac4d5902.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-IRPO-1epoch/1762652579.652392", - "retrieved_timestamp": "1762652579.6523929", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen-0.5B-IRPO-1epoch", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen-0.5B-IRPO-1epoch" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25891301746033857 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31638216610052033 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24664429530201343 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3286354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15001662234042554 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen-0.5B-IRPO-5epoch/301f71c8-fc1f-42e8-9029-f9d03574872b.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen-0.5B-IRPO-5epoch/301f71c8-fc1f-42e8-9029-f9d03574872b.json deleted file mode 100644 index 8d27e3a66a19bd2c14c1ac7e1cacd5badc8fe9bf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen-0.5B-IRPO-5epoch/301f71c8-fc1f-42e8-9029-f9d03574872b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-IRPO-5epoch/1762652579.652645", - "retrieved_timestamp": "1762652579.652645", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen-0.5B-IRPO-5epoch", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen-0.5B-IRPO-5epoch" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24867130325314607 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31891656220326015 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23993288590604026 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32866666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1506815159574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen-0.5B-eDPO-1epoch/65e2f2b2-cb5b-40f3-b23a-8c0d185de219.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen-0.5B-eDPO-1epoch/65e2f2b2-cb5b-40f3-b23a-8c0d185de219.json deleted file mode 100644 index 1e4486fdd78393f6c15b2cc545eb1392d1aa358a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen-0.5B-eDPO-1epoch/65e2f2b2-cb5b-40f3-b23a-8c0d185de219.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-eDPO-1epoch/1762652579.652854", - "retrieved_timestamp": "1762652579.6528552", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen-0.5B-eDPO-1epoch", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen-0.5B-eDPO-1epoch" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26233504878167707 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3180637583450692 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2424496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33269791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15525265957446807 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen-0.5B-eDPO-5epoch/062a1dcd-2553-4657-8f89-a481ff62a193.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen-0.5B-eDPO-5epoch/062a1dcd-2553-4657-8f89-a481ff62a193.json deleted file mode 100644 index df89901718076059c78e657103082cc36e82f043..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen-0.5B-eDPO-5epoch/062a1dcd-2553-4657-8f89-a481ff62a193.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-eDPO-5epoch/1762652579.653099", - "retrieved_timestamp": "1762652579.6531", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen-0.5B-eDPO-5epoch", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen-0.5B-eDPO-5epoch" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24774708883540117 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3096491823869347 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3326354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15226063829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IPO_5e-7-1ep_0alp_0lam/82b47608-08b5-4368-bead-aa117736c06d.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IPO_5e-7-1ep_0alp_0lam/82b47608-08b5-4368-bead-aa117736c06d.json deleted file mode 100644 index 08a4688604ac48fa76976776a5894fb868e3305a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IPO_5e-7-1ep_0alp_0lam/82b47608-08b5-4368-bead-aa117736c06d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IPO_5e-7-1ep_0alp_0lam/1762652579.680979", - "retrieved_timestamp": "1762652579.68098", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-IPO_5e-7-1ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-IPO_5e-7-1ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2573892826589006 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3279091360416723 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31685416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16505984042553193 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IPO_5e-7-3ep_0alp_0lam/747310d0-7c30-4261-b2e8-a783d8753e9a.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IPO_5e-7-3ep_0alp_0lam/747310d0-7c30-4261-b2e8-a783d8753e9a.json deleted file mode 100644 index d7d16f9d34f0a705607a1a903b49bbde857c9d5c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IPO_5e-7-3ep_0alp_0lam/747310d0-7c30-4261-b2e8-a783d8753e9a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IPO_5e-7-3ep_0alp_0lam/1762652579.6812391", - "retrieved_timestamp": "1762652579.68124", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-IPO_5e-7-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-IPO_5e-7-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3072481017034801 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32638442794247285 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31564583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1624002659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam/a7b6a07a-70fc-4d34-9a92-265b848d22d7.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam/a7b6a07a-70fc-4d34-9a92-265b848d22d7.json deleted file mode 100644 index 8930e1edd1b8bc87b15eda0d2e606e9ad7afa5f3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam/a7b6a07a-70fc-4d34-9a92-265b848d22d7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam/1762652579.68145", - "retrieved_timestamp": "1762652579.68145", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25509093649294984 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3242353334886223 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31825 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15741356382978725 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam/99139c71-a4f2-45d7-95b8-a8b7720681aa.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam/99139c71-a4f2-45d7-95b8-a8b7720681aa.json deleted file mode 100644 index 04674614582d4596efac27997800a8aff804f12c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam/99139c71-a4f2-45d7-95b8-a8b7720681aa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam/1762652579.681671", - "retrieved_timestamp": "1762652579.681671", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26358395723347383 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3198054258965539 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32615625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15857712765957446 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam/6407040d-023d-476a-ac79-ef85e104eace.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam/6407040d-023d-476a-ac79-ef85e104eace.json deleted file mode 100644 index 07f30a24f719d4dcb0c4535e0444d32f8a25538b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam/6407040d-023d-476a-ac79-ef85e104eace.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam/1762652579.681885", - "retrieved_timestamp": "1762652579.681886", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23228478215579107 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3254731912466387 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31688541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16115359042553193 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam/64f71756-0a54-4a42-a96a-7056071c7dd0.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam/64f71756-0a54-4a42-a96a-7056071c7dd0.json deleted file mode 100644 index 2b8a74b303fa653d0d2365af83601420ffeeff95..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam/64f71756-0a54-4a42-a96a-7056071c7dd0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam/1762652579.682102", - "retrieved_timestamp": "1762652579.682102", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24137732328000816 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3314225693635648 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33415625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15317486702127658 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam/8c18d418-a0a4-435a-b31f-7d879c793b4c.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam/8c18d418-a0a4-435a-b31f-7d879c793b4c.json deleted file mode 100644 index 3c08ebb389205119f2237b2e9f72ecd4295aa513..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam/8c18d418-a0a4-435a-b31f-7d879c793b4c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam/1762652579.6823108", - "retrieved_timestamp": "1762652579.6823108", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2677805999193252 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3361518077587983 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33815625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15608377659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam/75e153a7-d699-4822-90b6-9d7da259e124.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam/75e153a7-d699-4822-90b6-9d7da259e124.json deleted file mode 100644 index ea88ce1ed2837e6bc3c1d7e024ef53e842fadf10..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam/75e153a7-d699-4822-90b6-9d7da259e124.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam/1762652579.682508", - "retrieved_timestamp": "1762652579.682509", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25606501859510544 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3231121828613069 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31955208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1589095744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam/836cc2ab-edbc-45fa-af8c-034d0239635b.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam/836cc2ab-edbc-45fa-af8c-034d0239635b.json deleted file mode 100644 index a9670a48ea5d8cf31d4c22d992028605073540c9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam/836cc2ab-edbc-45fa-af8c-034d0239635b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam/1762652579.682722", - "retrieved_timestamp": "1762652579.682723", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2639086512675257 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3257435380157632 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32085416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15866023936170212 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam/f270e1bd-7e75-4c6c-a701-9def96275025.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam/f270e1bd-7e75-4c6c-a701-9def96275025.json deleted file mode 100644 index 6f3b3f5c9b196d6b007c6b4df5fafc5d0dd04cc1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam/f270e1bd-7e75-4c6c-a701-9def96275025.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam/1762652579.682945", - "retrieved_timestamp": "1762652579.682946", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2517686405404327 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213578303108222 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31688541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1584940159574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam/02ec1b4f-f1e0-4c46-bff2-1475e95cff80.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam/02ec1b4f-f1e0-4c46-bff2-1475e95cff80.json deleted file mode 100644 index 26ec77b4e530389e2b0cc1d211514918db28e5ce..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam/02ec1b4f-f1e0-4c46-bff2-1475e95cff80.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam/1762652579.683157", - "retrieved_timestamp": "1762652579.683158", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24382527249919106 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3266053460297184 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31955208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15541888297872342 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam/9da4a976-09a2-4f1c-a15e-d498a2adfdd4.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam/9da4a976-09a2-4f1c-a15e-d498a2adfdd4.json deleted file mode 100644 index 34dc60ea47f11e66ba844d3ee709967c5ea22ae3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam/9da4a976-09a2-4f1c-a15e-d498a2adfdd4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam/1762652579.6833699", - "retrieved_timestamp": "1762652579.683371", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24654804806801509 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32458923603023143 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31821875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15633311170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam/c3a945da-be07-4132-b558-f20202530b4d.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam/c3a945da-be07-4132-b558-f20202530b4d.json deleted file mode 100644 index 1875dd621648fc8469fdb6dfb2dc9d672037bd10..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam/c3a945da-be07-4132-b558-f20202530b4d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam/1762652579.683736", - "retrieved_timestamp": "1762652579.683738", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2505695997730466 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32614538576285174 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33818750000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15217752659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam/723afa16-d986-421c-a6ec-d1b00cb9d765.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam/723afa16-d986-421c-a6ec-d1b00cb9d765.json deleted file mode 100644 index 307700f278ad13e0560d78a2bf90319701cea4f5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam/723afa16-d986-421c-a6ec-d1b00cb9d765.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam/1762652579.684093", - "retrieved_timestamp": "1762652579.684094", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24567370133468086 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179765517720094 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3315208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15658244680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam/03e5cd5c-adc0-49d8-9e51-3e315d0bffd6.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam/03e5cd5c-adc0-49d8-9e51-3e315d0bffd6.json deleted file mode 100644 index c4ba16fd4d92f0927de4299b61c7c50d2baff3e9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam/03e5cd5c-adc0-49d8-9e51-3e315d0bffd6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam/1762652579.684393", - "retrieved_timestamp": "1762652579.684394", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24539887498503968 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32157618750132033 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33818750000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1544215425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam/6992c085-939e-48b0-8c8f-53d6ca9737de.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam/6992c085-939e-48b0-8c8f-53d6ca9737de.json deleted file mode 100644 index 75f084ba17bc0f0dc481c2d10e1e8ebdae5e35e9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam/6992c085-939e-48b0-8c8f-53d6ca9737de.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam/1762652579.684617", - "retrieved_timestamp": "1762652579.684618", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2341830786756916 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3189252460411593 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33015625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15799534574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam/59e7ed2b-8385-4c83-b357-6dfa52e429cc.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam/59e7ed2b-8385-4c83-b357-6dfa52e429cc.json deleted file mode 100644 index 8f1441b72aa1649b0e84235debf4c3cde465b383..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam/59e7ed2b-8385-4c83-b357-6dfa52e429cc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam/1762652579.684837", - "retrieved_timestamp": "1762652579.684837", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23196008812173918 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3233548545784329 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33688541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15425531914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam/495ed31f-9cbc-4f6f-b4be-2b9ee8f5011c.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam/495ed31f-9cbc-4f6f-b4be-2b9ee8f5011c.json deleted file mode 100644 index bb3db6056c0d4d1c325f223c3cf80cff86a40eae..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam/495ed31f-9cbc-4f6f-b4be-2b9ee8f5011c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam/1762652579.6850612", - "retrieved_timestamp": "1762652579.685062", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24175188499847072 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3175499101875348 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15799534574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam/6c5809dc-67b3-4567-8d1f-4a8104a11507.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam/6c5809dc-67b3-4567-8d1f-4a8104a11507.json deleted file mode 100644 index 1f5f59c0f196a96c13879714bd75535cc392761e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam/6c5809dc-67b3-4567-8d1f-4a8104a11507.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam/1762652579.6852841", - "retrieved_timestamp": "1762652579.685285", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24932069132124984 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196623899087389 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33148958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15708111702127658 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2Model", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam/44c78761-2672-49c4-85f4-9b0d575dd914.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam/44c78761-2672-49c4-85f4-9b0d575dd914.json deleted file mode 100644 index e3f25fc2be225645f83989aa9a1679091c2f1aa4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam/44c78761-2672-49c4-85f4-9b0d575dd914.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam/1762652579.685507", - "retrieved_timestamp": "1762652579.685508", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2520434668900739 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3197552188491219 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3261875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15508643617021275 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam/b33d4765-4633-4c2b-a118-1ed82b0c842b.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam/b33d4765-4633-4c2b-a118-1ed82b0c842b.json deleted file mode 100644 index 6e4a9a9784f7f74e5ae7a437a36cb376de546ee9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam/b33d4765-4633-4c2b-a118-1ed82b0c842b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam/1762652579.685728", - "retrieved_timestamp": "1762652579.685728", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25803867072700437 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3248229336342538 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34215625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15392287234042554 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam/8d200434-ef84-403e-9fb6-86c15c4ccfed.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam/8d200434-ef84-403e-9fb6-86c15c4ccfed.json deleted file mode 100644 index 4f350407669592585bf42e3af5eed5a11e2d69e7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam/8d200434-ef84-403e-9fb6-86c15c4ccfed.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam/1762652579.685941", - "retrieved_timestamp": "1762652579.685942", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23196008812173918 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.326545450978746 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27097315436241615 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33948958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15367353723404256 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam/3a666f3f-f2ea-4fed-b2fe-750b759eae7a.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam/3a666f3f-f2ea-4fed-b2fe-750b759eae7a.json deleted file mode 100644 index 389fd204bf3cafa358e614cc53943d3ec6a1e88e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam/3a666f3f-f2ea-4fed-b2fe-750b759eae7a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam/1762652579.686151", - "retrieved_timestamp": "1762652579.686152", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2487710386219675 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3272739110084265 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33415625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15309175531914893 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam/7fbad2de-a9da-4962-ae18-47298811ba5b.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam/7fbad2de-a9da-4962-ae18-47298811ba5b.json deleted file mode 100644 index f6c5e30ce45d0f49f649a91261da8d79692a38a9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam/7fbad2de-a9da-4962-ae18-47298811ba5b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam/1762652579.686357", - "retrieved_timestamp": "1762652579.686357", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25236816092412573 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3129690310926447 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0445619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32885416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15641622340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam/1fad00cf-e472-42dc-8b87-a0501cb051ab.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam/1fad00cf-e472-42dc-8b87-a0501cb051ab.json deleted file mode 100644 index 6050cbf109bb29e66f27cbfe162c53d2dc0003a8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam/1fad00cf-e472-42dc-8b87-a0501cb051ab.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam/1762652579.686578", - "retrieved_timestamp": "1762652579.686579", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2513940788219702 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.322095658026178 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33148958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15383976063829788 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam/c68fad94-ce6a-4053-b991-2c1e660fe7d9.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam/c68fad94-ce6a-4053-b991-2c1e660fe7d9.json deleted file mode 100644 index 45617c92e15f2d4a9e6c7651f2d15d72d7bb9ffb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam/c68fad94-ce6a-4053-b991-2c1e660fe7d9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam/1762652579.686833", - "retrieved_timestamp": "1762652579.6868339", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24567370133468086 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3180087717709833 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3275208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15724734042553193 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam/a6a3ee79-a93b-4220-ac09-1c5d2f70cdf8.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam/a6a3ee79-a93b-4220-ac09-1c5d2f70cdf8.json deleted file mode 100644 index cbf51fc8d146d9a2b09d54ee2b4dae463673e658..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam/a6a3ee79-a93b-4220-ac09-1c5d2f70cdf8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam/1762652579.6870458", - "retrieved_timestamp": "1762652579.687047", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26363382491788456 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31806866682195567 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3235208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15741356382978725 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_3e-6-1ep_3vpo_const/e3471a51-fad2-44cf-bd0c-ad1250d22f83.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_3e-6-1ep_3vpo_const/e3471a51-fad2-44cf-bd0c-ad1250d22f83.json deleted file mode 100644 index 41b7c558530830ecf0c22c4401fcce54139dc230..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_3e-6-1ep_3vpo_const/e3471a51-fad2-44cf-bd0c-ad1250d22f83.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_3e-6-1ep_3vpo_const/1762652579.6873431", - "retrieved_timestamp": "1762652579.687347", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VDPO_3e-6-1ep_3vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VDPO_3e-6-1ep_3vpo_const" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24829674153468353 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3174312444218736 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33279166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1558344414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam/5a3a76e9-f93d-435c-898c-b76bc5dc0cda.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam/5a3a76e9-f93d-435c-898c-b76bc5dc0cda.json deleted file mode 100644 index 06bfbe1e5ae5476cf83b7b2c387a933a149b0d7e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam/5a3a76e9-f93d-435c-898c-b76bc5dc0cda.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam/1762652579.687733", - "retrieved_timestamp": "1762652579.687735", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2517686405404327 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3218020653711833 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32348958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15949135638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_10vpo_const/fc83f198-e606-4c3d-aede-cb646b080b3b.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_10vpo_const/fc83f198-e606-4c3d-aede-cb646b080b3b.json deleted file mode 100644 index cf6e4913a0c77bf2fd452a24d2ed6feff301d599..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_10vpo_const/fc83f198-e606-4c3d-aede-cb646b080b3b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_10vpo_const/1762652579.6880698", - "retrieved_timestamp": "1762652579.688079", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_10vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_10vpo_const" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25361706937592254 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3234331515135053 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32355208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15965757978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_1vpo_const/e0452e02-8cf3-4da6-83f6-844f1de6fac2.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_1vpo_const/e0452e02-8cf3-4da6-83f6-844f1de6fac2.json deleted file mode 100644 index 9495aa32f15fa46c32082554f6412e7e9aa9c494..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_1vpo_const/e0452e02-8cf3-4da6-83f6-844f1de6fac2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_1vpo_const/1762652579.688372", - "retrieved_timestamp": "1762652579.688373", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_1vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_1vpo_const" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24479935460134664 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32395300683134437 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32485416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15866023936170212 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_3vpo_const/0792bedd-3891-4622-983b-886c126ace68.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_3vpo_const/0792bedd-3891-4622-983b-886c126ace68.json deleted file mode 100644 index baa74748093212de4c7c9bc14c77e7ae7c9c4810..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_3vpo_const/0792bedd-3891-4622-983b-886c126ace68.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_3vpo_const/1762652579.688585", - "retrieved_timestamp": "1762652579.688586", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_3vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_3vpo_const" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25046986440422525 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.322699453909483 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3209166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1589095744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam/31e52020-32b2-4271-89b5-31dfde730404.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam/31e52020-32b2-4271-89b5-31dfde730404.json deleted file mode 100644 index 66e9a85bbf868ec5cbc95330d439582a176c98d7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam/31e52020-32b2-4271-89b5-31dfde730404.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam/1762652579.6888041", - "retrieved_timestamp": "1762652579.688805", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24719743613611883 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.325505796038594 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32079166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15866023936170212 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_1vpo_const/06074d49-defe-4303-9899-18f074a06935.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_1vpo_const/06074d49-defe-4303-9899-18f074a06935.json deleted file mode 100644 index d6fc40f2651894d1071300aaa34affdb4d37c65b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_1vpo_const/06074d49-defe-4303-9899-18f074a06935.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_1vpo_const/1762652579.689013", - "retrieved_timestamp": "1762652579.689014", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_1vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_1vpo_const" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24165214962964932 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3255889369754366 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32745833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15625 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_3vpo_const/1ef0a501-863d-49dc-9bda-5151fb161b41.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_3vpo_const/1ef0a501-863d-49dc-9bda-5151fb161b41.json deleted file mode 100644 index c1c7bef9282f99c44596177c515d840d48391e64..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_3vpo_const/1ef0a501-863d-49dc-9bda-5151fb161b41.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_3vpo_const/1762652579.689225", - "retrieved_timestamp": "1762652579.689225", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_3vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_3vpo_const" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2526928549581776 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32354099176995715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32348958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15799534574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam/15177605-2eea-4d8a-8462-7b64f7d29071.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam/15177605-2eea-4d8a-8462-7b64f7d29071.json deleted file mode 100644 index 0efefab1c44a773236190a78b8d1c4a4fade8dde..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam/15177605-2eea-4d8a-8462-7b64f7d29071.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam/1762652579.68944", - "retrieved_timestamp": "1762652579.689441", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26685638550158025 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313735254746672 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07099697885196375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3168229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16339760638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_10vpo_const/09996570-4086-46c5-900e-887c3d5d5826.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_10vpo_const/09996570-4086-46c5-900e-887c3d5d5826.json deleted file mode 100644 index 05b7348577a2bc332dbdc8b954755a7794a994cb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_10vpo_const/09996570-4086-46c5-900e-887c3d5d5826.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_10vpo_const/1762652579.689661", - "retrieved_timestamp": "1762652579.689662", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_10vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_10vpo_const" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.270228549138508 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3299802970903615 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32079166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1634807180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_1vpo_const/8a24b990-24f1-46f6-a4f9-4ecaa39b4ec7.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_1vpo_const/8a24b990-24f1-46f6-a4f9-4ecaa39b4ec7.json deleted file mode 100644 index 68cc1ff0ebca85a08176cf2971c6383103ed77df..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_1vpo_const/8a24b990-24f1-46f6-a4f9-4ecaa39b4ec7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_1vpo_const/1762652579.689882", - "retrieved_timestamp": "1762652579.689883", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_1vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_1vpo_const" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24802191518504235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33086196042215565 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3208229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16489361702127658 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_30vpo_const/ac310031-4080-4124-a858-e1293532b222.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_30vpo_const/ac310031-4080-4124-a858-e1293532b222.json deleted file mode 100644 index 4692c9c690cc1c976538e66a4245c05f47f3d1a8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_30vpo_const/ac310031-4080-4124-a858-e1293532b222.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_30vpo_const/1762652579.690102", - "retrieved_timestamp": "1762652579.690103", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_30vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_30vpo_const" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26223531341285566 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3281993681712964 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.322125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16339760638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_3vpo_const/75a8a0dd-e64d-4462-b8be-8006f6710653.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_3vpo_const/75a8a0dd-e64d-4462-b8be-8006f6710653.json deleted file mode 100644 index 4e8e153d1ed178b5c4b61b03b4d37809352015b1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_3vpo_const/75a8a0dd-e64d-4462-b8be-8006f6710653.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_3vpo_const/1762652579.690311", - "retrieved_timestamp": "1762652579.690312", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_3vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_3vpo_const" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2608611816646498 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32980236442597805 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31679166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1651429521276596 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam/8469a871-39e1-4b21-bb7c-fa21026a01ba.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam/8469a871-39e1-4b21-bb7c-fa21026a01ba.json deleted file mode 100644 index 4918c73edb2b4ce384e2cfb8294a6b3b2cc7e5f5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam/8469a871-39e1-4b21-bb7c-fa21026a01ba.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam/1762652579.69052", - "retrieved_timestamp": "1762652579.690521", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2930347034756668 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3219547893625387 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3115833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1590757978723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_10vpo_const/046380aa-08bf-4d95-a4cc-bbfaf30eb56b.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_10vpo_const/046380aa-08bf-4d95-a4cc-bbfaf30eb56b.json deleted file mode 100644 index 33440eb6c11054258011e9337fd553d77e63c402..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_10vpo_const/046380aa-08bf-4d95-a4cc-bbfaf30eb56b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_10vpo_const/1762652579.690735", - "retrieved_timestamp": "1762652579.690736", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_10vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_10vpo_const" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28813880503730105 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32553831509236264 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07250755287009064 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31024999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15816156914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_1vpo_const/fa8ee240-a7ac-4edc-9ac7-beabf38af0fa.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_1vpo_const/fa8ee240-a7ac-4edc-9ac7-beabf38af0fa.json deleted file mode 100644 index bd63ff5d15bf1446a9e851c9b647295de4d2b672..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_1vpo_const/fa8ee240-a7ac-4edc-9ac7-beabf38af0fa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_1vpo_const/1762652579.690953", - "retrieved_timestamp": "1762652579.690954", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_1vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_1vpo_const" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2887383254209941 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3237016212336586 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07477341389728097 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31425 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16090425531914893 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_30vpo_const/6d30ee72-d0ea-496d-8375-892968c8602e.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_30vpo_const/6d30ee72-d0ea-496d-8375-892968c8602e.json deleted file mode 100644 index 400459f84dfcef3735d5b02306aff6131d026689..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_30vpo_const/6d30ee72-d0ea-496d-8375-892968c8602e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_30vpo_const/1762652579.691165", - "retrieved_timestamp": "1762652579.691166", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_30vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_30vpo_const" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2905368865720732 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3254390641560331 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0770392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3129166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15741356382978725 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_3vpo_const/903b0e99-e50a-4afa-8085-1fd01872c048.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_3vpo_const/903b0e99-e50a-4afa-8085-1fd01872c048.json deleted file mode 100644 index ca90d0f7e50450c087a6d0c59a2bb6a2657e9e10..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_3vpo_const/903b0e99-e50a-4afa-8085-1fd01872c048.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_3vpo_const/1762652579.691372", - "retrieved_timestamp": "1762652579.691373", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_3vpo_const", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_3vpo_const" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2904870188876625 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32381698216947513 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30894791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15915890957446807 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1/225277d4-e1b9-4992-8e2d-678ac6157b06.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1/225277d4-e1b9-4992-8e2d-678ac6157b06.json deleted file mode 100644 index 9d7e82cfddc7061f1e003d56321f016ab80a2483..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1/225277d4-e1b9-4992-8e2d-678ac6157b06.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1/1762652579.691587", - "retrieved_timestamp": "1762652579.691587", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23925406809487715 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3244192088381941 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1573304521276596 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3/4991436d-59fd-4f66-b588-9103beeeba5f.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3/4991436d-59fd-4f66-b588-9103beeeba5f.json deleted file mode 100644 index 78cb2b0dfbee114473a623a3814e1cd084cf27de..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3/4991436d-59fd-4f66-b588-9103beeeba5f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3/1762652579.691787", - "retrieved_timestamp": "1762652579.691788", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24747226248576 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32090616030928304 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3275208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1566655585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1/6118242a-de0a-4734-979d-86f2cc6fc65c.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1/6118242a-de0a-4734-979d-86f2cc6fc65c.json deleted file mode 100644 index dc4847cab285fb664f9b9973265249762fc2bebf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1/6118242a-de0a-4734-979d-86f2cc6fc65c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1/1762652579.691988", - "retrieved_timestamp": "1762652579.691989", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.232135179102559 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32779679775418075 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3021875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14960106382978725 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1/a6b71abf-7ee1-438b-8218-98803bca8de8.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1/a6b71abf-7ee1-438b-8218-98803bca8de8.json deleted file mode 100644 index 3aa5ea362ecfffc65d160f2db559afc67d6cfa15..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1/a6b71abf-7ee1-438b-8218-98803bca8de8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1/1762652579.6921952", - "retrieved_timestamp": "1762652579.6921952", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2541667220752049 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3253117533747236 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.318125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16090425531914893 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3/f7fb8d6b-9773-42e7-a426-a35a401f689a.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3/f7fb8d6b-9773-42e7-a426-a35a401f689a.json deleted file mode 100644 index dc6b2222709738a128011de32b832ccf559de503..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3/f7fb8d6b-9773-42e7-a426-a35a401f689a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3/1762652579.6924422", - "retrieved_timestamp": "1762652579.692443", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.273875539125077 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3245102552473828 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3089166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15965757978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JungZoona_T3Q-qwen2.5-14b-v1.0-e3/eb7694ce-6fe4-4bb0-bcab-266ccc71f78a.json b/leaderboard_data/HFOpenLLMv2/alibaba/JungZoona_T3Q-qwen2.5-14b-v1.0-e3/eb7694ce-6fe4-4bb0-bcab-266ccc71f78a.json deleted file mode 100644 index 62a54f4dc13b7a5542e253e113d9422b23c1ec96..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/JungZoona_T3Q-qwen2.5-14b-v1.0-e3/eb7694ce-6fe4-4bb0-bcab-266ccc71f78a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JungZoona_T3Q-qwen2.5-14b-v1.0-e3/1762652579.697056", - "retrieved_timestamp": "1762652579.697057", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JungZoona/T3Q-qwen2.5-14b-v1.0-e3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "JungZoona/T3Q-qwen2.5-14b-v1.0-e3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.732396707403024 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7585971930826706 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2862537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41694630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5911041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5884308510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Junhoee_Qwen-Megumin/0f231e27-deec-4b10-a995-d493ecf8400f.json b/leaderboard_data/HFOpenLLMv2/alibaba/Junhoee_Qwen-Megumin/0f231e27-deec-4b10-a995-d493ecf8400f.json deleted file mode 100644 index e8a7e939d9c2a65c75ed0673611a3c761865190f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Junhoee_Qwen-Megumin/0f231e27-deec-4b10-a995-d493ecf8400f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Junhoee_Qwen-Megumin/1762652579.69731", - "retrieved_timestamp": "1762652579.697311", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Junhoee/Qwen-Megumin", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Junhoee/Qwen-Megumin" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7141118897857683 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.528526812457251 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4901812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39803125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41988031914893614 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 15.231 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_Qwen2.5-0.5b-Test-ft/5a28540f-3a94-478c-84c0-5be8db86328a.json b/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_Qwen2.5-0.5b-Test-ft/5a28540f-3a94-478c-84c0-5be8db86328a.json deleted file mode 100644 index 358a02790d53afa76c90d25c3be0ffd8bdcf39a6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_Qwen2.5-0.5b-Test-ft/5a28540f-3a94-478c-84c0-5be8db86328a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/KingNish_Qwen2.5-0.5b-Test-ft/1762652579.699473", - "retrieved_timestamp": "1762652579.699473", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "KingNish/Qwen2.5-0.5b-Test-ft", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "KingNish/Qwen2.5-0.5b-Test-ft" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26708134416681073 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3231533857529747 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.035498489425981876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.342125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16888297872340424 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_qwen-1b-continued-v2.1/f12c6b15-107a-41ed-98fa-40b0af5be42e.json b/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_qwen-1b-continued-v2.1/f12c6b15-107a-41ed-98fa-40b0af5be42e.json deleted file mode 100644 index 8cdcbe08a3baa1827271395c3be26c84f802c812..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_qwen-1b-continued-v2.1/f12c6b15-107a-41ed-98fa-40b0af5be42e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/KingNish_qwen-1b-continued-v2.1/1762652579.700618", - "retrieved_timestamp": "1762652579.700619", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "KingNish/qwen-1b-continued-v2.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "KingNish/qwen-1b-continued-v2.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11268323603594019 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30416583041069006 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41539583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1278257978723404 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.277 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_qwen-1b-continued-v2.2/cf6aeb1a-4814-41ad-96f5-b59caafb902f.json b/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_qwen-1b-continued-v2.2/cf6aeb1a-4814-41ad-96f5-b59caafb902f.json deleted file mode 100644 index 2053439a294645b6e419e3926f39064168966b17..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_qwen-1b-continued-v2.2/cf6aeb1a-4814-41ad-96f5-b59caafb902f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/KingNish_qwen-1b-continued-v2.2/1762652579.7008262", - "retrieved_timestamp": "1762652579.700827", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "KingNish/qwen-1b-continued-v2.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "KingNish/qwen-1b-continued-v2.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14125963554479892 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30586579449667844 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35130208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1262466755319149 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.277 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_qwen-1b-continued-v2/479d9f2a-82f6-42de-b8d6-92405f60638c.json b/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_qwen-1b-continued-v2/479d9f2a-82f6-42de-b8d6-92405f60638c.json deleted file mode 100644 index 71b1da5f0dd916bb628a3d49f36c8d263c3f4fba..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_qwen-1b-continued-v2/479d9f2a-82f6-42de-b8d6-92405f60638c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/KingNish_qwen-1b-continued-v2/1762652579.7004201", - "retrieved_timestamp": "1762652579.700421", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "KingNish/qwen-1b-continued-v2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "KingNish/qwen-1b-continued-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1578711153073844 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31194932022650246 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33927083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11926529255319149 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.277 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_qwen-1b-continued/a4063b77-fc24-4c9d-bf08-cb28fc6e8259.json b/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_qwen-1b-continued/a4063b77-fc24-4c9d-bf08-cb28fc6e8259.json deleted file mode 100644 index 0338a38071be2fc3f59333d89df7ad4a1907b9c8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_qwen-1b-continued/a4063b77-fc24-4c9d-bf08-cb28fc6e8259.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/KingNish_qwen-1b-continued/1762652579.700214", - "retrieved_timestamp": "1762652579.700215", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "KingNish/qwen-1b-continued", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "KingNish/qwen-1b-continued" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12547263483113694 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29909543894796364 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38587499999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1260804521276596 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.277 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Kukedlc_Qwen-2.5-7b-Spanish-o1-CoT/c9a159fb-9e6b-49b3-8f2b-a2d2d3ca8f19.json b/leaderboard_data/HFOpenLLMv2/alibaba/Kukedlc_Qwen-2.5-7b-Spanish-o1-CoT/c9a159fb-9e6b-49b3-8f2b-a2d2d3ca8f19.json deleted file mode 100644 index 1a3edf10e0a39d64325b92d9ee580dd078a24161..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Kukedlc_Qwen-2.5-7b-Spanish-o1-CoT/c9a159fb-9e6b-49b3-8f2b-a2d2d3ca8f19.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Kukedlc_Qwen-2.5-7b-Spanish-o1-CoT/1762652579.703295", - "retrieved_timestamp": "1762652579.703295", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Kukedlc/Qwen-2.5-7b-Spanish-o1-CoT", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Kukedlc/Qwen-2.5-7b-Spanish-o1-CoT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4210295349672203 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5601947823443537 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726586102719033 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4776770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4363364361702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lambent_qwen2.5-reinstruct-alternate-lumen-14B/974e902e-0959-42d0-98f8-288e1a6ce887.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lambent_qwen2.5-reinstruct-alternate-lumen-14B/974e902e-0959-42d0-98f8-288e1a6ce887.json deleted file mode 100644 index cca382b4f0073666dc440f2076d7c3a93fb2c47f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Lambent_qwen2.5-reinstruct-alternate-lumen-14B/974e902e-0959-42d0-98f8-288e1a6ce887.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lambent_qwen2.5-reinstruct-alternate-lumen-14B/1762652579.707211", - "retrieved_timestamp": "1762652579.707212", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lambent/qwen2.5-reinstruct-alternate-lumen-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lambent/qwen2.5-reinstruct-alternate-lumen-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47938137475232384 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6458988582965893 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4622356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3766778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47700000000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.538813164893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/LenguajeNaturalAI_leniachat-qwen2-1.5B-v0/eb6e6d30-b349-447c-83d3-fe7760e83037.json b/leaderboard_data/HFOpenLLMv2/alibaba/LenguajeNaturalAI_leniachat-qwen2-1.5B-v0/eb6e6d30-b349-447c-83d3-fe7760e83037.json deleted file mode 100644 index e23f36edd740fe787608cd263cec9e14ef324c65..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/LenguajeNaturalAI_leniachat-qwen2-1.5B-v0/eb6e6d30-b349-447c-83d3-fe7760e83037.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LenguajeNaturalAI_leniachat-qwen2-1.5B-v0/1762652579.713998", - "retrieved_timestamp": "1762652579.713999", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LenguajeNaturalAI/leniachat-qwen2-1.5B-v0", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "LenguajeNaturalAI/leniachat-qwen2-1.5B-v0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22211842356059697 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36835590195612017 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3749895833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18799867021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.543 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v3/eb958d5c-aa2e-4640-bef7-c8b10a892847.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v3/eb958d5c-aa2e-4640-bef7-c8b10a892847.json deleted file mode 100644 index f2981832725c6713e51ff5f67a10fc5e76d81685..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v3/eb958d5c-aa2e-4640-bef7-c8b10a892847.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v3/1762652579.736984", - "retrieved_timestamp": "1762652579.7369852", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7048697456083193 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6478481476573447 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4161631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48075 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5393949468085106 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v4/17c5c728-e03d-45e9-aaae-816c4e90b14f.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v4/17c5c728-e03d-45e9-aaae-816c4e90b14f.json deleted file mode 100644 index 2b2e393a3f38a4f723493058bc5e4c11107e4273..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v4/17c5c728-e03d-45e9-aaae-816c4e90b14f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v4/1762652579.737248", - "retrieved_timestamp": "1762652579.7372491", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6943033373670748 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6419880364363972 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3466767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716442953020134 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.476875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5251828457446809 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v5/79d3d942-8d5f-4aca-8759-8d70b8cfc5f3.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v5/79d3d942-8d5f-4aca-8759-8d70b8cfc5f3.json deleted file mode 100644 index 1fabf9debc5deefbf10d725f5d8c1e17704456e2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v5/79d3d942-8d5f-4aca-8759-8d70b8cfc5f3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v5/1762652579.737468", - "retrieved_timestamp": "1762652579.737469", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v5", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7485084021507378 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6466679318879384 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624161073825503 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4473020833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5140458776595744 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt/92bff089-baed-4f1f-852b-f274a7920a1a.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt/92bff089-baed-4f1f-852b-f274a7920a1a.json deleted file mode 100644 index d5494427f7c5550369c0d06492a6c1b1b2044667..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt/92bff089-baed-4f1f-852b-f274a7920a1a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt/1762652579.7379", - "retrieved_timestamp": "1762652579.7379", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46634152936430895 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6214839063250638 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33157099697885195 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37583892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49373958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5204454787234043 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v6/c4b27a1b-28dd-4a79-839c-ad8673034937.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v6/c4b27a1b-28dd-4a79-839c-ad8673034937.json deleted file mode 100644 index 4a0184237be0dbcfb91e1ccfc15cd90930e64ec8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v6/c4b27a1b-28dd-4a79-839c-ad8673034937.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v6/1762652579.737686", - "retrieved_timestamp": "1762652579.737687", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.704320092909037 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6457646219275207 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3957703927492447 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3775167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47678125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5392287234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase/46a21741-1860-4498-8284-c94fccad1ed0.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase/46a21741-1860-4498-8284-c94fccad1ed0.json deleted file mode 100644 index f7a55eb21a7c7ab73a5b633ab4d79eaffe5f4cb4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase/46a21741-1860-4498-8284-c94fccad1ed0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase/1762652579.738374", - "retrieved_timestamp": "1762652579.7383769", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.693054428915278 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6422587980411637 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3406344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48881250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5276761968085106 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v7/d540acde-9601-4119-8ae2-f7cdf82f43f7.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v7/d540acde-9601-4119-8ae2-f7cdf82f43f7.json deleted file mode 100644 index 039af056d58570f201d445682d6f5a9d3e0d012f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v7/d540acde-9601-4119-8ae2-f7cdf82f43f7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v7/1762652579.738115", - "retrieved_timestamp": "1762652579.738116", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6793906833867471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.653127892154805 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4833854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5375664893617021 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.5/c723fc6f-2656-4084-81d0-4cbaf0587049.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.5/c723fc6f-2656-4084-81d0-4cbaf0587049.json deleted file mode 100644 index 881b5f514d56f442979ad06f184714a504097e11..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.5/c723fc6f-2656-4084-81d0-4cbaf0587049.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.5/1762652579.738977", - "retrieved_timestamp": "1762652579.7389781", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.5", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5928624937388352 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6451310724242122 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36555891238670696 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47696875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5290059840425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.6/526f6468-b7a8-47a7-9ed4-c2aa7cc63ca1.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.6/526f6468-b7a8-47a7-9ed4-c2aa7cc63ca1.json deleted file mode 100644 index f55560c5a9e25d40c27d2b87db7d8b25661b0d9a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.6/526f6468-b7a8-47a7-9ed4-c2aa7cc63ca1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.6/1762652579.7392142", - "retrieved_timestamp": "1762652579.7392151", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.6", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.6" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5919382793210903 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6457173605698173 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070996978851964 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38422818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49532291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5399767287234043 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.7/56232cf6-7ee7-45ed-b139-ea20e148b5fa.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.7/56232cf6-7ee7-45ed-b139-ea20e148b5fa.json deleted file mode 100644 index 17fbc40a7c6eccf1ea118d9bff069aadb6c3c47b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.7/56232cf6-7ee7-45ed-b139-ea20e148b5fa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.7/1762652579.7395148", - "retrieved_timestamp": "1762652579.739517", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7874761189200211 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6482757721443902 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.540785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4380625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.524185505319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.8/51ff4f00-1d21-4f98-b5a3-7a72c4b2a5b1.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.8/51ff4f00-1d21-4f98-b5a3-7a72c4b2a5b1.json deleted file mode 100644 index f4a9cfa1c76e7f01304745fb5881dd070c99285c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.8/51ff4f00-1d21-4f98-b5a3-7a72c4b2a5b1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.8/1762652579.739795", - "retrieved_timestamp": "1762652579.739796", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.8", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.8" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7027963581075989 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6565626437486437 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42371601208459214 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37583892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4911979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5323304521276596 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.9/eee0ebda-6ff8-45bd-ac4e-15aeb724d0d1.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.9/eee0ebda-6ff8-45bd-ac4e-15aeb724d0d1.json deleted file mode 100644 index 88ff0231d990d6b0b189f750b3c0716cbfb8cc4d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.9/eee0ebda-6ff8-45bd-ac4e-15aeb724d0d1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.9/1762652579.74003", - "retrieved_timestamp": "1762652579.740031", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.9", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.9" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7993413032974729 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6483097746745584 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5370090634441088 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3296979865771812 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43282291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5199468085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8/b3e7af18-231e-4839-809c-bc5bfe7b4182.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8/b3e7af18-231e-4839-809c-bc5bfe7b4182.json deleted file mode 100644 index 54d8a94add53aee34f1f91d4b9f827293854ac48..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8/b3e7af18-231e-4839-809c-bc5bfe7b4182.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8/1762652579.738731", - "retrieved_timestamp": "1762652579.738732", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7874761189200211 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6419472828128271 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5558912386706949 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43936458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206117021276596 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9-stock/757269fe-8662-4eaa-8e76-5c2f88d8fbb0.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9-stock/757269fe-8662-4eaa-8e76-5c2f88d8fbb0.json deleted file mode 100644 index 3ed5139c44ddbcc1cb534736333f4e4b95dca93b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9-stock/757269fe-8662-4eaa-8e76-5c2f88d8fbb0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9-stock/1762652579.740509", - "retrieved_timestamp": "1762652579.74051", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9-stock", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9-stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6513639365771708 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6570671029574323 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41842900302114805 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38422818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4819583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5412234042553191 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9.1/dffd1a4a-a056-43c2-bda3-0cfa21406656.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9.1/dffd1a4a-a056-43c2-bda3-0cfa21406656.json deleted file mode 100644 index 3abcf273a9571909bf875ab90b93a3523dcdea48..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9.1/dffd1a4a-a056-43c2-bda3-0cfa21406656.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9.1/1762652579.74074", - "retrieved_timestamp": "1762652579.740741", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8002655177152178 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6554749578648256 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5468277945619335 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43539583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5250997340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9.2/b5ecb480-16e6-4dfb-be77-ad8ef4e90aa3.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9.2/b5ecb480-16e6-4dfb-be77-ad8ef4e90aa3.json deleted file mode 100644 index a19c197d478950bd51db1c904f395347a75dfbbe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9.2/b5ecb480-16e6-4dfb-be77-ad8ef4e90aa3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9.2/1762652579.74097", - "retrieved_timestamp": "1762652579.74097", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7862272104682243 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6537693501484436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5332326283987915 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35570469798657717 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43809375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5283410904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9/682a38c6-2fb8-4c42-b6ad-69fbe65be484.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9/682a38c6-2fb8-4c42-b6ad-69fbe65be484.json deleted file mode 100644 index d8cc77e4192c46bae500c34881b2ef4f8d606d22..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9/682a38c6-2fb8-4c42-b6ad-69fbe65be484.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9/1762652579.740272", - "retrieved_timestamp": "1762652579.740273", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.523519816309614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6545588984302916 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43655589123867067 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3884228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4805625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.542220744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-OriginalFusion/cf14f098-cd46-4ca0-acec-02012eb78ea3.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-OriginalFusion/cf14f098-cd46-4ca0-acec-02012eb78ea3.json deleted file mode 100644 index 97958544db702c59d825de735bd273a307ed7a2b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-OriginalFusion/cf14f098-cd46-4ca0-acec-02012eb78ea3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-OriginalFusion/1762652579.741195", - "retrieved_timestamp": "1762652579.741195", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lunzima/NQLSG-Qwen2.5-14B-OriginalFusion", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Lunzima/NQLSG-Qwen2.5-14B-OriginalFusion" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6141947809589667 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6592166466793806 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42749244712990936 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.51215625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5238530585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Marsouuu_MiniQwenMathExpert-ECE-PRYMMAL-Martial/f1b6c510-02fe-4ffd-96da-4cfcfb04eb8c.json b/leaderboard_data/HFOpenLLMv2/alibaba/Marsouuu_MiniQwenMathExpert-ECE-PRYMMAL-Martial/f1b6c510-02fe-4ffd-96da-4cfcfb04eb8c.json deleted file mode 100644 index ee067b6926092a91dbbfad2b8767933dfb262e4c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Marsouuu_MiniQwenMathExpert-ECE-PRYMMAL-Martial/f1b6c510-02fe-4ffd-96da-4cfcfb04eb8c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Marsouuu_MiniQwenMathExpert-ECE-PRYMMAL-Martial/1762652579.747411", - "retrieved_timestamp": "1762652579.747412", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Marsouuu/MiniQwenMathExpert-ECE-PRYMMAL-Martial", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Marsouuu/MiniQwenMathExpert-ECE-PRYMMAL-Martial" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2794961812435449 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42301343044108936 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38673958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2922207446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_Qwen1.5-MoE-A2.7B-Wikihow/ee23e137-57d2-49aa-b267-27bd48457d46.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_Qwen1.5-MoE-A2.7B-Wikihow/ee23e137-57d2-49aa-b267-27bd48457d46.json deleted file mode 100644 index 3795e62912a1b802a0a755f05908ff273c1b1fe1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_Qwen1.5-MoE-A2.7B-Wikihow/ee23e137-57d2-49aa-b267-27bd48457d46.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Qwen1.5-MoE-A2.7B-Wikihow/1762652579.750923", - "retrieved_timestamp": "1762652579.750923", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/Qwen1.5-MoE-A2.7B-Wikihow", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/Qwen1.5-MoE-A2.7B-Wikihow" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29543278501043896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3920071454890602 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0823262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35021875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23803191489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 14.316 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.1-qwen2-72b/ae68a60d-a2df-45f1-b446-1400901cb6ff.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.1-qwen2-72b/ae68a60d-a2df-45f1-b446-1400901cb6ff.json deleted file mode 100644 index 343294d935ac93df44c7d2ed4a5a55370aa3179f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.1-qwen2-72b/ae68a60d-a2df-45f1-b446-1400901cb6ff.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-qwen2-72b/1762652579.75234", - "retrieved_timestamp": "1762652579.752341", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.1-qwen2-72b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.1-qwen2-72b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8162774770941104 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6965560971922596 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47321875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5414727393617021 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.699 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.1-qwen2-7b/6c31df3b-e408-4a6c-b475-78f174630cad.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.1-qwen2-7b/6c31df3b-e408-4a6c-b475-78f174630cad.json deleted file mode 100644 index c1ffe918116e207d11e2f38774424344d0d21905..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.1-qwen2-7b/6c31df3b-e408-4a6c-b475-78f174630cad.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-qwen2-7b/1762652579.752553", - "retrieved_timestamp": "1762652579.752554", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.1-qwen2-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.1-qwen2-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3816119008674761 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5045925887362795 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44369791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3692652925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.1-qwen2.5-72b/2b841a46-6210-4092-875f-ca3ae36f3d25.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.1-qwen2.5-72b/2b841a46-6210-4092-875f-ca3ae36f3d25.json deleted file mode 100644 index 0f2afb1eed516a9c7670c9e86ee5c531ecf438a6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.1-qwen2.5-72b/2b841a46-6210-4092-875f-ca3ae36f3d25.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-qwen2.5-72b/1762652579.752765", - "retrieved_timestamp": "1762652579.752765", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.1-qwen2.5-72b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.1-qwen2.5-72b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8662360315075112 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7261624327092416 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5913897280966768 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36325503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42984375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5619182180851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.7 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.2-qwen2-72b/250897a9-7d48-4323-813d-fa48befe2cbe.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.2-qwen2-72b/250897a9-7d48-4323-813d-fa48befe2cbe.json deleted file mode 100644 index 59190bae2340fd7b7065891e4afae6b62307374d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.2-qwen2-72b/250897a9-7d48-4323-813d-fa48befe2cbe.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-qwen2-72b/1762652579.753872", - "retrieved_timestamp": "1762652579.753872", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.2-qwen2-72b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.2-qwen2-72b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8008151704145002 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6939595229335245 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37416107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4508020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.543467420212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.2-qwen2-7b/154b7a41-e1bf-4827-a6a7-279ea170ab7e.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.2-qwen2-7b/154b7a41-e1bf-4827-a6a7-279ea170ab7e.json deleted file mode 100644 index 0b96a7b144842a75da9955372b7435061fe9349f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.2-qwen2-7b/154b7a41-e1bf-4827-a6a7-279ea170ab7e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-qwen2-7b/1762652579.7540858", - "retrieved_timestamp": "1762652579.754087", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.2-qwen2-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.2-qwen2-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35972996094806226 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5214913750127922 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43582291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3898769946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.2-qwen2.5-72b/1fa2ab02-9a1c-4e7e-95b8-27e78af0ba73.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.2-qwen2.5-72b/1fa2ab02-9a1c-4e7e-95b8-27e78af0ba73.json deleted file mode 100644 index b07673e588ffa080cd5a7923c0049d72b17b7817..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.2-qwen2.5-72b/1fa2ab02-9a1c-4e7e-95b8-27e78af0ba73.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-qwen2.5-72b/1762652579.754294", - "retrieved_timestamp": "1762652579.754294", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.2-qwen2.5-72b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.2-qwen2.5-72b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8476763875406145 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7276399007138082 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35906040268456374 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4206666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.561751994680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.7 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.3-qwen2-72b/8b769df2-18f5-4712-a02b-962d3e2bb7c7.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.3-qwen2-72b/8b769df2-18f5-4712-a02b-962d3e2bb7c7.json deleted file mode 100644 index 7bf9b81d70d4376668475b453d23bd5397f6c2c5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.3-qwen2-72b/8b769df2-18f5-4712-a02b-962d3e2bb7c7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-qwen2-72b/1762652579.755723", - "retrieved_timestamp": "1762652579.755724", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.3-qwen2-72b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.3-qwen2-72b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3849840645044039 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6576306700720502 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31722054380664655 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716442953020134 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4112395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5418882978723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.3-qwen2-7b/3272e904-21d5-4116-abde-0e74fe48b9d5.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.3-qwen2-7b/3272e904-21d5-4116-abde-0e74fe48b9d5.json deleted file mode 100644 index fc36c6bb67fe0cf45c93b4ddacd9e16c81d2518d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.3-qwen2-7b/3272e904-21d5-4116-abde-0e74fe48b9d5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-qwen2-7b/1762652579.755967", - "retrieved_timestamp": "1762652579.755968", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.3-qwen2-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.3-qwen2-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3824862476008103 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5064049035932394 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20694864048338368 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4422395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3611203457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.4-qwen2-7b/5f54ee4a-42e8-4dd0-88bc-915d2f1971a0.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.4-qwen2-7b/5f54ee4a-42e8-4dd0-88bc-915d2f1971a0.json deleted file mode 100644 index 22fdb7227e2540238ed9c2adcb6a25feb1bb4c89..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.4-qwen2-7b/5f54ee4a-42e8-4dd0-88bc-915d2f1971a0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.4-qwen2-7b/1762652579.756743", - "retrieved_timestamp": "1762652579.756744", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.4-qwen2-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.4-qwen2-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32995452067181746 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5101416326251771 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44528125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3976894946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.5-qwen2-7b/762f6ff3-4823-4de8-8351-045e1d1d383b.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.5-qwen2-7b/762f6ff3-4823-4de8-8351-045e1d1d383b.json deleted file mode 100644 index f4eae59e411990062c59c024bb9d57a764d0da1a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.5-qwen2-7b/762f6ff3-4823-4de8-8351-045e1d1d383b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.5-qwen2-7b/1762652579.757269", - "retrieved_timestamp": "1762652579.75727", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.5-qwen2-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.5-qwen2-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31449221399220734 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4886561146965678 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2258308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45646875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3681848404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.6-qwen2-7b/65f44cf9-f619-4f43-a03f-09e22386d319.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.6-qwen2-7b/65f44cf9-f619-4f43-a03f-09e22386d319.json deleted file mode 100644 index 91fe3fabbc34ca65843fb1ef701b9fda8e7fb5e2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.6-qwen2-7b/65f44cf9-f619-4f43-a03f-09e22386d319.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.6-qwen2-7b/1762652579.7575328", - "retrieved_timestamp": "1762652579.757534", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.6-qwen2-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.6-qwen2-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3442676542684522 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4930243946403894 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2843959731543625 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4586145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3731715425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.7-qwen2-7b/f592bc27-c97c-4b14-abcf-30782d8c0056.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.7-qwen2-7b/f592bc27-c97c-4b14-abcf-30782d8c0056.json deleted file mode 100644 index caf6ae62321cf8d58fc6ba4bc18db6ffdb84676e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.7-qwen2-7b/f592bc27-c97c-4b14-abcf-30782d8c0056.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.7-qwen2-7b/1762652579.757804", - "retrieved_timestamp": "1762652579.757805", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.7-qwen2-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.7-qwen2-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3592301759331906 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4883170901309997 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13821752265861026 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48242708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3705119680851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Minami-su_Amara-o1-7B-Qwen/6910eff9-74bc-46b0-8f8c-20642bef4a12.json b/leaderboard_data/HFOpenLLMv2/alibaba/Minami-su_Amara-o1-7B-Qwen/6910eff9-74bc-46b0-8f8c-20642bef4a12.json deleted file mode 100644 index ac866ee38f52100f0ea016e4c77f969687c651cb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Minami-su_Amara-o1-7B-Qwen/6910eff9-74bc-46b0-8f8c-20642bef4a12.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Minami-su_Amara-o1-7B-Qwen/1762652579.759999", - "retrieved_timestamp": "1762652579.76", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Minami-su/Amara-o1-7B-Qwen", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Minami-su/Amara-o1-7B-Qwen" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7389914316236474 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5199420077880453 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5181268882175226 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40066666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4083277925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Minami-su_Amara-o2-7B-Qwen/ebd5da9f-60d5-492e-916b-5e123442316c.json b/leaderboard_data/HFOpenLLMv2/alibaba/Minami-su_Amara-o2-7B-Qwen/ebd5da9f-60d5-492e-916b-5e123442316c.json deleted file mode 100644 index 574134d0b3214f5a135b36507d6aa96238622e1c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Minami-su_Amara-o2-7B-Qwen/ebd5da9f-60d5-492e-916b-5e123442316c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Minami-su_Amara-o2-7B-Qwen/1762652579.760268", - "retrieved_timestamp": "1762652579.760268", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Minami-su/Amara-o2-7B-Qwen", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Minami-su/Amara-o2-7B-Qwen" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7146615424850509 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5173432604435285 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4086102719033233 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37809374999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41647273936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Nexesenex_Qwen_2.5_3b_Smarteaz_0.01a/eaf601d2-f285-4b0c-b3ab-5d029b8fe20f.json b/leaderboard_data/HFOpenLLMv2/alibaba/Nexesenex_Qwen_2.5_3b_Smarteaz_0.01a/eaf601d2-f285-4b0c-b3ab-5d029b8fe20f.json deleted file mode 100644 index 9be688bb7726b425899c95d4c1cc3da56c33b74d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Nexesenex_Qwen_2.5_3b_Smarteaz_0.01a/eaf601d2-f285-4b0c-b3ab-5d029b8fe20f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Qwen_2.5_3b_Smarteaz_0.01a/1762652579.782197", - "retrieved_timestamp": "1762652579.782198", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Qwen_2.5_3b_Smarteaz_0.01a", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Nexesenex/Qwen_2.5_3b_Smarteaz_0.01a" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4011954946209391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4636652015725344 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1805135951661631 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43204166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2859873670212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.085 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/NikolaSigmoid_DeepSeek-R1-Distill-Qwen-1.5B-500/c0182d01-454b-4194-be7a-81b9a9672d07.json b/leaderboard_data/HFOpenLLMv2/alibaba/NikolaSigmoid_DeepSeek-R1-Distill-Qwen-1.5B-500/c0182d01-454b-4194-be7a-81b9a9672d07.json deleted file mode 100644 index 26f772ddb0ffa31e7cc5458b3920326a44fca053..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/NikolaSigmoid_DeepSeek-R1-Distill-Qwen-1.5B-500/c0182d01-454b-4194-be7a-81b9a9672d07.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NikolaSigmoid_DeepSeek-R1-Distill-Qwen-1.5B-500/1762652579.783665", - "retrieved_timestamp": "1762652579.783666", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NikolaSigmoid/DeepSeek-R1-Distill-Qwen-1.5B-500", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "NikolaSigmoid/DeepSeek-R1-Distill-Qwen-1.5B-500" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17485715678843247 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2601595454586609 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33796875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1124501329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.157 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/PJMixers-Dev_Qwen2.5-RomboTiesTest-7B/a954be32-0c84-4ffe-9c4f-7f895c77e197.json b/leaderboard_data/HFOpenLLMv2/alibaba/PJMixers-Dev_Qwen2.5-RomboTiesTest-7B/a954be32-0c84-4ffe-9c4f-7f895c77e197.json deleted file mode 100644 index ad31c0a691bc7eb520f3bbd617e928aedfef9c4f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/PJMixers-Dev_Qwen2.5-RomboTiesTest-7B/a954be32-0c84-4ffe-9c4f-7f895c77e197.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/PJMixers-Dev_Qwen2.5-RomboTiesTest-7B/1762652579.811478", - "retrieved_timestamp": "1762652579.81148", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "PJMixers-Dev/Qwen2.5-RomboTiesTest-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "PJMixers-Dev/Qwen2.5-RomboTiesTest-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7558023821238757 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5398673461520839 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4962235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4033645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4285239361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.808 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Pinkstack_PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B/7b8f75d1-ef18-4fb4-abbb-efd6147fe74c.json b/leaderboard_data/HFOpenLLMv2/alibaba/Pinkstack_PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B/7b8f75d1-ef18-4fb4-abbb-efd6147fe74c.json deleted file mode 100644 index 4453b50609130a7b6176fbc4642f129806799cef..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Pinkstack_PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B/7b8f75d1-ef18-4fb4-abbb-efd6147fe74c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Pinkstack_PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B/1762652579.812139", - "retrieved_timestamp": "1762652579.812139", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Pinkstack/PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Pinkstack/PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5084819390328772 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47105662040096935 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1691842900302115 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44785416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35106382978723405 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_QwQ-32B-Preview/1326f0c0-9355-47ff-813b-0729370e1487.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_QwQ-32B-Preview/1326f0c0-9355-47ff-813b-0729370e1487.json deleted file mode 100644 index 7ce95fc928c58d1aa3d40871098518a69102c832..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_QwQ-32B-Preview/1326f0c0-9355-47ff-813b-0729370e1487.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_QwQ-32B-Preview/1762652579.834909", - "retrieved_timestamp": "1762652579.83491", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/QwQ-32B-Preview", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/QwQ-32B-Preview" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4035437084713006 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6691381482252744 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44939577039274925 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2818791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4109895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5678191489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_QwQ-32B/788241ad-d975-498e-80ef-b0d04bd8db85.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_QwQ-32B/788241ad-d975-498e-80ef-b0d04bd8db85.json deleted file mode 100644 index c8c60953c0554741ac5f932a70dd3afd5b6b45d6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_QwQ-32B/788241ad-d975-498e-80ef-b0d04bd8db85.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_QwQ-32B/1762652579.8346298", - "retrieved_timestamp": "1762652579.834631", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/QwQ-32B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/QwQ-32B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39767372793077926 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29829653176003074 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1608761329305136 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42063541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11959773936170212 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-0.5B/e0115d6b-3b2c-4047-b64c-1e7afb5edd55.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-0.5B/e0115d6b-3b2c-4047-b64c-1e7afb5edd55.json deleted file mode 100644 index 1a1b64ba364ea9ba1fb81a6b45d5ed5d8f108fd4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-0.5B/e0115d6b-3b2c-4047-b64c-1e7afb5edd55.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-0.5B/1762652579.835391", - "retrieved_timestamp": "1762652579.835392", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen1.5-0.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-0.5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17056077873375977 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3153538659142558 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36162500000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1307347074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.62 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-1.8B/7c828833-fd36-4a84-8530-d3c1769ca822.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-1.8B/7c828833-fd36-4a84-8530-d3c1769ca822.json deleted file mode 100644 index 5ad9183697a67d07f794cb5a54045cea14be46c6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-1.8B/7c828833-fd36-4a84-8530-d3c1769ca822.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-1.8B/1762652579.835954", - "retrieved_timestamp": "1762652579.835955", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen1.5-1.8B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-1.8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2154239639711521 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3476121558366305 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36051041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18816489361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.837 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-110B/29389e2b-7898-4f9f-ba8c-8fe4dad80295.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-110B/29389e2b-7898-4f9f-ba8c-8fe4dad80295.json deleted file mode 100644 index 036719a30fd863e9bb485070578bfaec0de2660d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-110B/29389e2b-7898-4f9f-ba8c-8fe4dad80295.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-110B/1762652579.836433", - "retrieved_timestamp": "1762652579.836434", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen1.5-110B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-110B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3421942667677318 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6099964981780978 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24697885196374622 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44084375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5360704787234043 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 111.21 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-14B/9afcb068-65e2-4d4c-b7ee-071eb4dbac73.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-14B/9afcb068-65e2-4d4c-b7ee-071eb4dbac73.json deleted file mode 100644 index c008ac93017c8c20597b9cf1c2e00b710ff43224..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-14B/9afcb068-65e2-4d4c-b7ee-071eb4dbac73.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-14B/1762652579.836853", - "retrieved_timestamp": "1762652579.836853", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen1.5-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2905368865720732 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5080327493808331 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20241691842900303 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41864583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36436170212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.167 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-32B/b8cd9221-dd4e-4f49-b03e-f11bdd5773e4.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-32B/b8cd9221-dd4e-4f49-b03e-f11bdd5773e4.json deleted file mode 100644 index 19bcf18a29eb0253479ad539b69339710488e8db..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-32B/b8cd9221-dd4e-4f49-b03e-f11bdd5773e4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-32B/1762652579.837265", - "retrieved_timestamp": "1762652579.837266", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen1.5-32B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-32B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.329729562006587 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5715390555959325 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028700906344411 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3296979865771812 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4277916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4499667553191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.512 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-4B/1e3f60f2-814a-4979-87bd-f5f94d5b09cc.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-4B/1e3f60f2-814a-4979-87bd-f5f94d5b09cc.json deleted file mode 100644 index 78e6eea27607ea2d8145b5a964fa187fb8c556eb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-4B/1e3f60f2-814a-4979-87bd-f5f94d5b09cc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-4B/1762652579.837696", - "retrieved_timestamp": "1762652579.837697", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen1.5-4B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-4B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24447466056729478 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40538970296725463 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3604479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24601063829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.95 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-7B/102378fc-7b98-4088-a6f5-3039e7b638d5.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-7B/102378fc-7b98-4088-a6f5-3039e7b638d5.json deleted file mode 100644 index 9eba02630146ba83681112de58a4b21a556e1f4d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-7B/102378fc-7b98-4088-a6f5-3039e7b638d5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-7B/1762652579.838115", - "retrieved_timestamp": "1762652579.8381162", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen1.5-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684299879874289 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4559896407693445 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09290030211480363 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4103333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29163896276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.721 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-MoE-A2.7B/c6aa0ed8-3b79-4d73-8587-762e9469f4ce.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-MoE-A2.7B/c6aa0ed8-3b79-4d73-8587-762e9469f4ce.json deleted file mode 100644 index 555eb3cfe436fac2c095f2201adb511d1fc2f669..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-MoE-A2.7B/c6aa0ed8-3b79-4d73-8587-762e9469f4ce.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-MoE-A2.7B/1762652579.83854", - "retrieved_timestamp": "1762652579.83854", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen1.5-MoE-A2.7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen1.5-MoE-A2.7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.265982038768246 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4113515433010766 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09290030211480363 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40134375000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2777593085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 14.316 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-0.5B/cdf3b683-29d9-45b4-b6a6-1f67927ef953.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-0.5B/cdf3b683-29d9-45b4-b6a6-1f67927ef953.json deleted file mode 100644 index 4f3d5e4dec002be83a5bc8a95d25b1acb3cdc3bd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-0.5B/cdf3b683-29d9-45b4-b6a6-1f67927ef953.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-0.5B/1762652579.838974", - "retrieved_timestamp": "1762652579.838975", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2-0.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-0.5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18732186154957736 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3239117424825444 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37520833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17195811170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-1.5B/6eb76673-0633-440b-8849-8fcf8cf00954.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-1.5B/6eb76673-0633-440b-8849-8fcf8cf00954.json deleted file mode 100644 index c0d00d0d675b637a3328ee4cf2b6d13fe4225c9c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-1.5B/6eb76673-0633-440b-8849-8fcf8cf00954.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-1.5B/1762652579.839384", - "retrieved_timestamp": "1762652579.839385", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2-1.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-1.5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21132705665412216 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35747931720577464 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36581250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2551529255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-57B-A14B/aafb84cd-5950-4b93-98d1-9e50fd294b65.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-57B-A14B/aafb84cd-5950-4b93-98d1-9e50fd294b65.json deleted file mode 100644 index af612061002c5dcf79ec55d0fb5b92a3c199e611..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-57B-A14B/aafb84cd-5950-4b93-98d1-9e50fd294b65.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-57B-A14B/1762652579.8398201", - "retrieved_timestamp": "1762652579.839821", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2-57B-A14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-57B-A14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31126965340851165 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5618204938684165 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1865558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.417375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4916057180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 57.409 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-72B/fc683e1a-327f-4a69-bd51-9022c587159b.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-72B/fc683e1a-327f-4a69-bd51-9022c587159b.json deleted file mode 100644 index f5d39d30b22c8943069abe4cf885afbf1451c303..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-72B/fc683e1a-327f-4a69-bd51-9022c587159b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-72B/1762652579.8402402", - "retrieved_timestamp": "1762652579.840241", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2-72B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-72B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3823610243044012 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.661734029856643 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39429530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47036458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5730551861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-7B/196e965c-4570-43aa-ba0d-13972796bda9.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-7B/196e965c-4570-43aa-ba0d-13972796bda9.json deleted file mode 100644 index 57c3b27a6fdc0fbf158f41ba9a371d6e1509ae30..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-7B/196e965c-4570-43aa-ba0d-13972796bda9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-7B/1762652579.840696", - "retrieved_timestamp": "1762652579.840696", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3148667757106699 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.531531595001889 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4439166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41830119680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-Math-7B/fe474496-4efa-4ef7-844d-32b17abda7c8.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-Math-7B/fe474496-4efa-4ef7-844d-32b17abda7c8.json deleted file mode 100644 index be65f27cf72ff35f3b14c9568cdafbebacf39366..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-Math-7B/fe474496-4efa-4ef7-844d-32b17abda7c8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-Math-7B/1762652579.841364", - "retrieved_timestamp": "1762652579.841364", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2-Math-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2-Math-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2687048143370701 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.386954741074792 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24773413897280966 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35933333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1196808510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-0.5B/c8110747-f2dd-46d0-b2b3-706d70e1d714.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-0.5B/c8110747-f2dd-46d0-b2b3-706d70e1d714.json deleted file mode 100644 index c93fe847112db9442f5a4d9d9336e6fd9534812d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-0.5B/c8110747-f2dd-46d0-b2b3-706d70e1d714.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-0.5B/1762652579.841982", - "retrieved_timestamp": "1762652579.841983", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-0.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-0.5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16271714606133947 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32748148151196615 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24664429530201343 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3433333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19057513297872342 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.5 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-1.5B/9982c576-75fd-47f6-8fe9-52b56fc58d3f.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-1.5B/9982c576-75fd-47f6-8fe9-52b56fc58d3f.json deleted file mode 100644 index 41839d9fb82ca335770bfc9cc778c0f3adab0059..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-1.5B/9982c576-75fd-47f6-8fe9-52b56fc58d3f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-1.5B/1762652579.8426108", - "retrieved_timestamp": "1762652579.842612", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-1.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-1.5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26743041795768563 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40779509451366147 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09138972809667674 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35759375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28548869680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.5 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-14B/b02dabaf-2aac-468d-b0cc-c7194c2094fd.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-14B/b02dabaf-2aac-468d-b0cc-c7194c2094fd.json deleted file mode 100644 index dc97cd17878b7b7f4d10309d78c273bf88476edc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-14B/b02dabaf-2aac-468d-b0cc-c7194c2094fd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-14B/1762652579.843051", - "retrieved_timestamp": "1762652579.8430521", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3694464022127954 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.616051493531774 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29003021148036257 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4502395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5248503989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-32B/9dd61039-27d0-42f3-9b03-65b0a59465d4.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-32B/9dd61039-27d0-42f3-9b03-65b0a59465d4.json deleted file mode 100644 index a5e6dabfc7f2f380837adee7b322537f4bb7d71e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-32B/9dd61039-27d0-42f3-9b03-65b0a59465d4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-32B/1762652579.843701", - "retrieved_timestamp": "1762652579.843702", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-32B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-32B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40766499554515356 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6770522448726507 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3564954682779456 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41191275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49783333333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5805352393617021 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-3B/43062e28-5532-4e31-ac49-fbd794c7f664.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-3B/43062e28-5532-4e31-ac49-fbd794c7f664.json deleted file mode 100644 index 08e0a42e327034da3fee59ef4b52f6cb519878ac..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-3B/43062e28-5532-4e31-ac49-fbd794c7f664.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-3B/1762652579.8441322", - "retrieved_timestamp": "1762652579.8441331", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-3B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2689541527591236 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4612475341011634 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14803625377643503 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4303333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3203125 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-72B/89ce1911-289d-40bb-be48-f9a4d8d73ac2.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-72B/89ce1911-289d-40bb-be48-f9a4d8d73ac2.json deleted file mode 100644 index 2529499f5052322c08fb97dcd290a1d4f4d5b7ad..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-72B/89ce1911-289d-40bb-be48-f9a4d8d73ac2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-72B/1762652579.844565", - "retrieved_timestamp": "1762652579.844566", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-72B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-72B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4137100670664947 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6797320670694852 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39123867069486407 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4052013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.477125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5968251329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-7B/bed92e1c-8f11-4f70-826e-569aa55baa09.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-7B/bed92e1c-8f11-4f70-826e-569aa55baa09.json deleted file mode 100644 index 1de74221afd75c93eef4897f590d34affa9cabae..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-7B/bed92e1c-8f11-4f70-826e-569aa55baa09.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-7B/1762652579.8449879", - "retrieved_timestamp": "1762652579.8449888", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3374479713825982 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5416303767788616 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25075528700906347 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4424270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4365026595744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-Coder-14B/d0ae041c-8b56-4ce1-841b-96622a724894.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-Coder-14B/d0ae041c-8b56-4ce1-841b-96622a724894.json deleted file mode 100644 index db4ce938c45b7ecdf0c9feec74ab07ebcb64ad23..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-Coder-14B/d0ae041c-8b56-4ce1-841b-96622a724894.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-14B/1762652579.8457868", - "retrieved_timestamp": "1762652579.845789", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-Coder-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-Coder-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3472652561869174 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5864860091741232 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22507552870090636 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4521276595744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-Coder-32B/743c517a-ad0f-495d-b9d0-cdca01335933.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-Coder-32B/743c517a-ad0f-495d-b9d0-cdca01335933.json deleted file mode 100644 index ae46c4b9c63a6b18f6d59da1c0f7d8a150882d1c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-Coder-32B/743c517a-ad0f-495d-b9d0-cdca01335933.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-32B/1762652579.846424", - "retrieved_timestamp": "1762652579.846425", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-Coder-32B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-Coder-32B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4363411304228336 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.640395506550809 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30891238670694865 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4528125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5302526595744681 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-Coder-7B/5e82cb32-8291-497b-ac56-16b50947d1bf.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-Coder-7B/5e82cb32-8291-497b-ac56-16b50947d1bf.json deleted file mode 100644 index 13a7a94b0c6ed039295f5765dcfda8a122e0ca60..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-Coder-7B/5e82cb32-8291-497b-ac56-16b50947d1bf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-7B/1762652579.846894", - "retrieved_timestamp": "1762652579.8468952", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-Coder-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-Coder-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.344592348302504 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48556405534214747 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19184290030211482 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3448541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3679355053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-Math-7B/8fddcebe-58d2-4d40-8147-f02feabc0d9c.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-Math-7B/8fddcebe-58d2-4d40-8147-f02feabc0d9c.json deleted file mode 100644 index 87c4a63febf719d3870050fb0a4e6c7d74842f8b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-Math-7B/8fddcebe-58d2-4d40-8147-f02feabc0d9c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Math-7B/1762652579.8480499", - "retrieved_timestamp": "1762652579.848052", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Qwen/Qwen2.5-Math-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Qwen/Qwen2.5-Math-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24599839536873275 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4454639372840941 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30513595166163143 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37809374999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27177526595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/RESMPDEV_EVA-Qwen2.5-1.5B-FRFR/648e69e2-54de-43c4-93ac-f8422fa4b9c1.json b/leaderboard_data/HFOpenLLMv2/alibaba/RESMPDEV_EVA-Qwen2.5-1.5B-FRFR/648e69e2-54de-43c4-93ac-f8422fa4b9c1.json deleted file mode 100644 index 85bbbdfcd01fa453b44fcdbd1044c0fb73e493ce..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/RESMPDEV_EVA-Qwen2.5-1.5B-FRFR/648e69e2-54de-43c4-93ac-f8422fa4b9c1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/RESMPDEV_EVA-Qwen2.5-1.5B-FRFR/1762652579.848896", - "retrieved_timestamp": "1762652579.848896", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "RESMPDEV/EVA-Qwen2.5-1.5B-FRFR", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "RESMPDEV/EVA-Qwen2.5-1.5B-FRFR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.308172316121225 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3932411333682871 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3539375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27701130319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/RESMPDEV_Qwen2-Wukong-0.5B/72a11594-1d83-4e12-b82f-137b6749f5ab.json b/leaderboard_data/HFOpenLLMv2/alibaba/RESMPDEV_Qwen2-Wukong-0.5B/72a11594-1d83-4e12-b82f-137b6749f5ab.json deleted file mode 100644 index f547bc17029e71c8feb4bb2ea2bde7d4b3d14219..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/RESMPDEV_Qwen2-Wukong-0.5B/72a11594-1d83-4e12-b82f-137b6749f5ab.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/RESMPDEV_Qwen2-Wukong-0.5B/1762652579.849144", - "retrieved_timestamp": "1762652579.849144", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "RESMPDEV/Qwen2-Wukong-0.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "RESMPDEV/Qwen2-Wukong-0.5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1854235650296768 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.308451428837168 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0015105740181268882 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23657718120805368 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3524791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13272938829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Replete-AI_Replete-Coder-Qwen2-1.5b/1ff6b76b-7241-4f06-9db5-4594d3ff7a3f.json b/leaderboard_data/HFOpenLLMv2/alibaba/Replete-AI_Replete-Coder-Qwen2-1.5b/1ff6b76b-7241-4f06-9db5-4594d3ff7a3f.json deleted file mode 100644 index 8b3f7e8d85b43cc1a8deb83b392a547c9c20d59b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Replete-AI_Replete-Coder-Qwen2-1.5b/1ff6b76b-7241-4f06-9db5-4594d3ff7a3f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-Coder-Qwen2-1.5b/1762652579.852138", - "retrieved_timestamp": "1762652579.852139", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Replete-AI/Replete-Coder-Qwen2-1.5b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Replete-AI/Replete-Coder-Qwen2-1.5b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30142798884736943 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34747295666696026 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4072708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21467752659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Replete-AI_Replete-LLM-Qwen2-7b/20a6e090-2c78-4eb9-870e-9abbcbada6f9.json b/leaderboard_data/HFOpenLLMv2/alibaba/Replete-AI_Replete-LLM-Qwen2-7b/20a6e090-2c78-4eb9-870e-9abbcbada6f9.json deleted file mode 100644 index 75f4996fe9c1fecbffe305dc34a762a56b7e8f51..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Replete-AI_Replete-LLM-Qwen2-7b/20a6e090-2c78-4eb9-870e-9abbcbada6f9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-LLM-Qwen2-7b/1762652579.852611", - "retrieved_timestamp": "1762652579.852612", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Replete-AI/Replete-LLM-Qwen2-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Replete-AI/Replete-LLM-Qwen2-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09324813716494457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2976924067792704 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39409374999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11569148936170212 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Replete-AI_Replete-LLM-Qwen2-7b/a846978d-de78-48e8-a738-54c732e50c28.json b/leaderboard_data/HFOpenLLMv2/alibaba/Replete-AI_Replete-LLM-Qwen2-7b/a846978d-de78-48e8-a738-54c732e50c28.json deleted file mode 100644 index 69b71649b30681745e57709c80259371e8690e92..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Replete-AI_Replete-LLM-Qwen2-7b/a846978d-de78-48e8-a738-54c732e50c28.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-LLM-Qwen2-7b/1762652579.8524", - "retrieved_timestamp": "1762652579.8524008", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Replete-AI/Replete-LLM-Qwen2-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Replete-AI/Replete-LLM-Qwen2-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09047549391170981 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29852574011260374 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38476041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1157746010638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Replete-AI_Replete-LLM-Qwen2-7b_Beta-Preview/4977e0d5-1446-41ba-b00b-e8236c896d2e.json b/leaderboard_data/HFOpenLLMv2/alibaba/Replete-AI_Replete-LLM-Qwen2-7b_Beta-Preview/4977e0d5-1446-41ba-b00b-e8236c896d2e.json deleted file mode 100644 index 7fc8bd94e8efbdad4bf7834f87e1ebde8f0d23bc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Replete-AI_Replete-LLM-Qwen2-7b_Beta-Preview/4977e0d5-1446-41ba-b00b-e8236c896d2e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-LLM-Qwen2-7b_Beta-Preview/1762652579.852791", - "retrieved_timestamp": "1762652579.852791", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Replete-AI/Replete-LLM-Qwen2-7b_Beta-Preview", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Replete-AI/Replete-LLM-Qwen2-7b_Beta-Preview" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08575468645416384 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2929321328066677 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3980625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1284906914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Rombo-Org_Rombo-LLM-V2.5-Qwen-7b/8713e6fb-8843-43f2-af3b-57a59d326670.json b/leaderboard_data/HFOpenLLMv2/alibaba/Rombo-Org_Rombo-LLM-V2.5-Qwen-7b/8713e6fb-8843-43f2-af3b-57a59d326670.json deleted file mode 100644 index 0a9bc0f5a1c7467644a108859c397f2324ed0442..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Rombo-Org_Rombo-LLM-V2.5-Qwen-7b/8713e6fb-8843-43f2-af3b-57a59d326670.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Rombo-Org_Rombo-LLM-V2.5-Qwen-7b/1762652579.854495", - "retrieved_timestamp": "1762652579.854495", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Rombo-Org/Rombo-LLM-V2.5-Qwen-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Rombo-Org/Rombo-LLM-V2.5-Qwen-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.748183708116686 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5399745025607596 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.506797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39803125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4282746010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Sakalti_QwenTest-7/2d99163e-9ebd-49d9-ad13-ee1f780d277c.json b/leaderboard_data/HFOpenLLMv2/alibaba/Sakalti_QwenTest-7/2d99163e-9ebd-49d9-ad13-ee1f780d277c.json deleted file mode 100644 index 1c770c304923329889a7db1c4877c37363742f37..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Sakalti_QwenTest-7/2d99163e-9ebd-49d9-ad13-ee1f780d277c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_QwenTest-7/1762652579.8585348", - "retrieved_timestamp": "1762652579.858536", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/QwenTest-7", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Sakalti/QwenTest-7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16718861509683197 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3063209532879154 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34218750000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12117686170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.988 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Sakalti_qwen2.5-2.3B/6dc5b101-c681-4010-941a-3983cb9eff53.json b/leaderboard_data/HFOpenLLMv2/alibaba/Sakalti_qwen2.5-2.3B/6dc5b101-c681-4010-941a-3983cb9eff53.json deleted file mode 100644 index 8d836bb50d09e1e3856543ebac0274feed63781e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Sakalti_qwen2.5-2.3B/6dc5b101-c681-4010-941a-3983cb9eff53.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_qwen2.5-2.3B/1762652579.869403", - "retrieved_timestamp": "1762652579.8694038", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/qwen2.5-2.3B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Sakalti/qwen2.5-2.3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12879493078365403 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2849449123234445 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38565625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11727061170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2Model", - "params_billions": 2.339 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Impish_QWEN_14B-1M/a059e151-6f32-48ff-900b-4e232aef3cc0.json b/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Impish_QWEN_14B-1M/a059e151-6f32-48ff-900b-4e232aef3cc0.json deleted file mode 100644 index 3431e55f2d551f88cb9090bf3bddde8827ce1d43..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Impish_QWEN_14B-1M/a059e151-6f32-48ff-900b-4e232aef3cc0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Impish_QWEN_14B-1M/1762652579.8825831", - "retrieved_timestamp": "1762652579.882584", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SicariusSicariiStuff/Impish_QWEN_14B-1M", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Impish_QWEN_14B-1M" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7867768631675067 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6282934814011238 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46146875000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.504404920212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Impish_QWEN_7B-1M/64c02fd8-386d-4b4c-bc00-d243cfcae7f1.json b/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Impish_QWEN_7B-1M/64c02fd8-386d-4b4c-bc00-d243cfcae7f1.json deleted file mode 100644 index f87cd61b6a4666b56ec2b1f0610a249f689f8069..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Impish_QWEN_7B-1M/64c02fd8-386d-4b4c-bc00-d243cfcae7f1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Impish_QWEN_7B-1M/1762652579.8828428", - "retrieved_timestamp": "1762652579.882844", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SicariusSicariiStuff/Impish_QWEN_7B-1M", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Impish_QWEN_7B-1M" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6381744881359238 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.537172912933626 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30891238670694865 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40739583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4265292553191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Qwen2.5-14B_Uncencored/7c6f4fa2-6847-4f57-8a8f-31673bd8b1e7.json b/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Qwen2.5-14B_Uncencored/7c6f4fa2-6847-4f57-8a8f-31673bd8b1e7.json deleted file mode 100644 index 171ef49447290f41bcd1e7bfc8cbdb85feb50ade..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Qwen2.5-14B_Uncencored/7c6f4fa2-6847-4f57-8a8f-31673bd8b1e7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Qwen2.5-14B_Uncencored/1762652579.883748", - "retrieved_timestamp": "1762652579.883749", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SicariusSicariiStuff/Qwen2.5-14B_Uncencored", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Qwen2.5-14B_Uncencored" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31579099012841483 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6308941945507827 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31797583081570996 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45166666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.526595744680851 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Qwen2.5-14B_Uncensored/ea18a046-87bb-42d9-a1b2-d01fe875c970.json b/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Qwen2.5-14B_Uncensored/ea18a046-87bb-42d9-a1b2-d01fe875c970.json deleted file mode 100644 index 6187f4b33568e3764766dea2f9d3e270d63275b1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Qwen2.5-14B_Uncensored/ea18a046-87bb-42d9-a1b2-d01fe875c970.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Qwen2.5-14B_Uncensored/1762652579.883949", - "retrieved_timestamp": "1762652579.88395", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SicariusSicariiStuff/Qwen2.5-14B_Uncensored", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Qwen2.5-14B_Uncensored" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3173147249298528 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6308941945507827 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31797583081570996 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45166666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.526595744680851 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Qwen2.5-14B_Uncensored_Instruct/8012de5a-8cb0-4039-895f-70c20e9237ee.json b/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Qwen2.5-14B_Uncensored_Instruct/8012de5a-8cb0-4039-895f-70c20e9237ee.json deleted file mode 100644 index efe8de8bf6792e56afc7e5e426a47eb7b0d8c533..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Qwen2.5-14B_Uncensored_Instruct/8012de5a-8cb0-4039-895f-70c20e9237ee.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Qwen2.5-14B_Uncensored_Instruct/1762652579.884166", - "retrieved_timestamp": "1762652579.884167", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SicariusSicariiStuff/Qwen2.5-14B_Uncensored_Instruct", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Qwen2.5-14B_Uncensored_Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3789389929830627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5936792404117958 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3285498489425982 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3296979865771812 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36965625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5127160904255319 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/StelleX_Qwen2.5_Math_7B_Cot/a0802c61-1314-4a46-9b61-7a89246bac42.json b/leaderboard_data/HFOpenLLMv2/alibaba/StelleX_Qwen2.5_Math_7B_Cot/a0802c61-1314-4a46-9b61-7a89246bac42.json deleted file mode 100644 index 3fb322cdd9d65b78208932642fd1ae6cf47309cf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/StelleX_Qwen2.5_Math_7B_Cot/a0802c61-1314-4a46-9b61-7a89246bac42.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/StelleX_Qwen2.5_Math_7B_Cot/1762652579.8928509", - "retrieved_timestamp": "1762652579.892852", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "StelleX/Qwen2.5_Math_7B_Cot", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "StelleX/Qwen2.5_Math_7B_Cot" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2142747908881767 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4312922433417096 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39241666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.281000664893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/T145_qwen-2.5-3B-merge-test/071d7565-90e5-43e8-a158-ab333beacdcf.json b/leaderboard_data/HFOpenLLMv2/alibaba/T145_qwen-2.5-3B-merge-test/071d7565-90e5-43e8-a158-ab333beacdcf.json deleted file mode 100644 index 3c58ab0622bc6193593c22771662c832a0e405dc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/T145_qwen-2.5-3B-merge-test/071d7565-90e5-43e8-a158-ab333beacdcf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_qwen-2.5-3B-merge-test/1762652579.908712", - "retrieved_timestamp": "1762652579.9087129", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/qwen-2.5-3B-merge-test", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "T145/qwen-2.5-3B-merge-test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5751018408932742 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4842488747720393 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3202416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40072916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3289561170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/TIGER-Lab_AceCoder-Qwen2.5-7B-Ins-Rule/7621e05b-1b5e-43e5-a65c-322334575e68.json b/leaderboard_data/HFOpenLLMv2/alibaba/TIGER-Lab_AceCoder-Qwen2.5-7B-Ins-Rule/7621e05b-1b5e-43e5-a65c-322334575e68.json deleted file mode 100644 index c272d8b61859419ba8c472ffe964ef6838982547..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/TIGER-Lab_AceCoder-Qwen2.5-7B-Ins-Rule/7621e05b-1b5e-43e5-a65c-322334575e68.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TIGER-Lab_AceCoder-Qwen2.5-7B-Ins-Rule/1762652579.910362", - "retrieved_timestamp": "1762652579.910363", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TIGER-Lab/AceCoder-Qwen2.5-7B-Ins-Rule", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "TIGER-Lab/AceCoder-Qwen2.5-7B-Ins-Rule" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.742413462944986 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5404426673547671 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49924471299093653 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39803125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4321808510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/TIGER-Lab_AceCoder-Qwen2.5-Coder-7B-Base-Rule/f6223009-028e-4063-90ce-e008a3b5b284.json b/leaderboard_data/HFOpenLLMv2/alibaba/TIGER-Lab_AceCoder-Qwen2.5-Coder-7B-Base-Rule/f6223009-028e-4063-90ce-e008a3b5b284.json deleted file mode 100644 index 807348eeb5f9d8054485734b1c12ef499e65abd1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/TIGER-Lab_AceCoder-Qwen2.5-Coder-7B-Base-Rule/f6223009-028e-4063-90ce-e008a3b5b284.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TIGER-Lab_AceCoder-Qwen2.5-Coder-7B-Base-Rule/1762652579.910613", - "retrieved_timestamp": "1762652579.910613", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Base-Rule", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Base-Rule" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44076273177391545 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49023782785253694 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20166163141993956 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34488541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37450132978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/TIGER-Lab_AceCoder-Qwen2.5-Coder-7B-Ins-Rule/f75e2bca-e300-4b3c-a5aa-f6aae03e7330.json b/leaderboard_data/HFOpenLLMv2/alibaba/TIGER-Lab_AceCoder-Qwen2.5-Coder-7B-Ins-Rule/f75e2bca-e300-4b3c-a5aa-f6aae03e7330.json deleted file mode 100644 index 413f80bc64aaa7393211f590bd89cab1bcafd317..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/TIGER-Lab_AceCoder-Qwen2.5-Coder-7B-Ins-Rule/f75e2bca-e300-4b3c-a5aa-f6aae03e7330.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TIGER-Lab_AceCoder-Qwen2.5-Coder-7B-Ins-Rule/1762652579.910825", - "retrieved_timestamp": "1762652579.910826", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Ins-Rule", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Ins-Rule" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6222378843690297 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5089236146835355 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36027190332326287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40463541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34283577127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/TIGER-Lab_Qwen2.5-Math-7B-CFT/07e72fc4-9c37-4a81-a788-8619035c66d3.json b/leaderboard_data/HFOpenLLMv2/alibaba/TIGER-Lab_Qwen2.5-Math-7B-CFT/07e72fc4-9c37-4a81-a788-8619035c66d3.json deleted file mode 100644 index 708353ab1fa30aaa6910c0383729fdc503feb459..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/TIGER-Lab_Qwen2.5-Math-7B-CFT/07e72fc4-9c37-4a81-a788-8619035c66d3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TIGER-Lab_Qwen2.5-Math-7B-CFT/1762652579.911227", - "retrieved_timestamp": "1762652579.911228", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TIGER-Lab/Qwen2.5-Math-7B-CFT", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "TIGER-Lab/Qwen2.5-Math-7B-CFT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2776976200924658 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46369414980230833 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5574018126888217 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38866666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446476063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-r-v0.3/43b106fe-ff02-4cfe-956f-cfc9e272de78.json b/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-r-v0.3/43b106fe-ff02-4cfe-956f-cfc9e272de78.json deleted file mode 100644 index f8ac7564e021e6b92edecebbd956e4764e041782..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-r-v0.3/43b106fe-ff02-4cfe-956f-cfc9e272de78.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-r-v0.3/1762652579.917092", - "retrieved_timestamp": "1762652579.917093", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TheTsar1209/qwen-carpmuscle-r-v0.3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "TheTsar1209/qwen-carpmuscle-r-v0.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44550902715904905 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6227124007872 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30060422960725075 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42776041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5103058510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.1/ce9658b7-b457-4fb3-8fce-4173b5d93f2d.json b/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.1/ce9658b7-b457-4fb3-8fce-4173b5d93f2d.json deleted file mode 100644 index 68496680371f6775a2878f55f75c866e997e5ff4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.1/ce9658b7-b457-4fb3-8fce-4173b5d93f2d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-v0.1/1762652579.917331", - "retrieved_timestamp": "1762652579.917332", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TheTsar1209/qwen-carpmuscle-v0.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "TheTsar1209/qwen-carpmuscle-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5621628390448454 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.643430074129922 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2628398791540785 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41610416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.520029920212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.2/eed9909e-db3e-4d6a-8caa-3f208ace941d.json b/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.2/eed9909e-db3e-4d6a-8caa-3f208ace941d.json deleted file mode 100644 index d401bf54d52db192f15bd2dc9684b3eb3751cc77..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.2/eed9909e-db3e-4d6a-8caa-3f208ace941d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-v0.2/1762652579.917543", - "retrieved_timestamp": "1762652579.917544", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TheTsar1209/qwen-carpmuscle-v0.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "TheTsar1209/qwen-carpmuscle-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5256929391791557 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6386922464145662 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35570469798657717 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43455208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5147107712765957 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.3/f8aa8470-6803-458e-8207-b217969dd6f3.json b/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.3/f8aa8470-6803-458e-8207-b217969dd6f3.json deleted file mode 100644 index 0083b6ac036520cacc5f5d3e8679d3113a2284a6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.3/f8aa8470-6803-458e-8207-b217969dd6f3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-v0.3/1762652579.917758", - "retrieved_timestamp": "1762652579.917759", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TheTsar1209/qwen-carpmuscle-v0.3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "TheTsar1209/qwen-carpmuscle-v0.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4476322823441801 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6151533941210218 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31344410876132933 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565436241610738 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4131875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5061502659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.4.1/c464e6b4-aa76-4b42-ab9b-71f193ec2a57.json b/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.4.1/c464e6b4-aa76-4b42-ab9b-71f193ec2a57.json deleted file mode 100644 index 4f63f0f88a6f4279e24e89f5be8a145de9a0214b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.4.1/c464e6b4-aa76-4b42-ab9b-71f193ec2a57.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-v0.4.1/1762652579.918201", - "retrieved_timestamp": "1762652579.9182022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TheTsar1209/qwen-carpmuscle-v0.4.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "TheTsar1209/qwen-carpmuscle-v0.4.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7359938297051822 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6506533698399672 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27794561933534745 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44890625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5191156914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.4/90fe60dc-76dd-4e90-99b4-c16d026afcb5.json b/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.4/90fe60dc-76dd-4e90-99b4-c16d026afcb5.json deleted file mode 100644 index 1310d5effa5f77693c566336a1c70adb91743b9b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.4/90fe60dc-76dd-4e90-99b4-c16d026afcb5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-v0.4/1762652579.917984", - "retrieved_timestamp": "1762652579.917985", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TheTsar1209/qwen-carpmuscle-v0.4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "TheTsar1209/qwen-carpmuscle-v0.4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7202068289915202 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6453667027727318 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.277190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45160416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5143783244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Triangle104_DSR1-Distill-Qwen-7B-RP/856c2575-700c-4b00-8883-bcde8841e262.json b/leaderboard_data/HFOpenLLMv2/alibaba/Triangle104_DSR1-Distill-Qwen-7B-RP/856c2575-700c-4b00-8883-bcde8841e262.json deleted file mode 100644 index 8a390e587151eddd8f31289ae322a827960aa9e7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Triangle104_DSR1-Distill-Qwen-7B-RP/856c2575-700c-4b00-8883-bcde8841e262.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_DSR1-Distill-Qwen-7B-RP/1762652579.923616", - "retrieved_timestamp": "1762652579.923616", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/DSR1-Distill-Qwen-7B-RP", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Triangle104/DSR1-Distill-Qwen-7B-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36092900171544834 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4326490703099772 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48036253776435045 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40454166666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30277593085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Weyaxi_Einstein-v7-Qwen2-7B/b20c1304-d782-4d41-9c15-0091f9c914e4.json b/leaderboard_data/HFOpenLLMv2/alibaba/Weyaxi_Einstein-v7-Qwen2-7B/b20c1304-d782-4d41-9c15-0091f9c914e4.json deleted file mode 100644 index 67dad616144b08aace5f4fe6d6d9fb6fdd6e8eab..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Weyaxi_Einstein-v7-Qwen2-7B/b20c1304-d782-4d41-9c15-0091f9c914e4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Weyaxi_Einstein-v7-Qwen2-7B/1762652579.949607", - "retrieved_timestamp": "1762652579.949609", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Weyaxi/Einstein-v7-Qwen2-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Weyaxi/Einstein-v7-Qwen2-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4099633417111043 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5161472249498397 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19939577039274925 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43997916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4095744680851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-7B-MS-Destroyer/c5d4bbfe-68a9-4808-ab2e-e92dd88ba06a.json b/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-7B-MS-Destroyer/c5d4bbfe-68a9-4808-ab2e-e92dd88ba06a.json deleted file mode 100644 index 09e23dbd3297ca4d603d3be0fac90f9b89ef3373..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-7B-MS-Destroyer/c5d4bbfe-68a9-4808-ab2e-e92dd88ba06a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-7B-MS-Destroyer/1762652579.953399", - "retrieved_timestamp": "1762652579.953399", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Xiaojian9992024/Qwen2.5-7B-MS-Destroyer", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Xiaojian9992024/Qwen2.5-7B-MS-Destroyer" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7295741964653786 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5469696828400438 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42702083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4412400265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-Dyanka-7B-Preview-v0.2/5cf588ed-fde6-4ee1-833e-a6743cc1834c.json b/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-Dyanka-7B-Preview-v0.2/5cf588ed-fde6-4ee1-833e-a6743cc1834c.json deleted file mode 100644 index 02a76ad5e6a6060ab1c10f7478a09c1bd3c3230c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-Dyanka-7B-Preview-v0.2/5cf588ed-fde6-4ee1-833e-a6743cc1834c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-Dyanka-7B-Preview-v0.2/1762652579.953881", - "retrieved_timestamp": "1762652579.9538822", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview-v0.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6701984068937087 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.537439126573433 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47205438066465255 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4467083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4370844414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-Dyanka-7B-Preview/97a591f9-2052-43b3-851d-ac73c793a000.json b/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-Dyanka-7B-Preview/97a591f9-2052-43b3-851d-ac73c793a000.json deleted file mode 100644 index 847acae1d66e79f1d17c21e4a456c450f52f26d3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-Dyanka-7B-Preview/97a591f9-2052-43b3-851d-ac73c793a000.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-Dyanka-7B-Preview/1762652579.95366", - "retrieved_timestamp": "1762652579.953661", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7640205765147586 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5543342320067098 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4879154078549849 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44807291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43758311170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-THREADRIPPER-Medium-Censored/89ca3fb4-eb53-422c-a4dd-029bd1fc7c37.json b/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-THREADRIPPER-Medium-Censored/89ca3fb4-eb53-422c-a4dd-029bd1fc7c37.json deleted file mode 100644 index c22414580aa26ad39a71bec6d09348a560585bb2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-THREADRIPPER-Medium-Censored/89ca3fb4-eb53-422c-a4dd-029bd1fc7c37.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-THREADRIPPER-Medium-Censored/1762652579.95415", - "retrieved_timestamp": "1762652579.954151", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Medium-Censored", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Medium-Censored" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8112064876749248 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6431453053747279 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.533987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.414 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49285239361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-THREADRIPPER-Small-AnniversaryEdition/4fcdfdff-87be-47b0-93bb-b4bc0bb2499d.json b/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-THREADRIPPER-Small-AnniversaryEdition/4fcdfdff-87be-47b0-93bb-b4bc0bb2499d.json deleted file mode 100644 index 4d7f76714861263c5d3dc262fa0377c0d130ba95..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-THREADRIPPER-Small-AnniversaryEdition/4fcdfdff-87be-47b0-93bb-b4bc0bb2499d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-THREADRIPPER-Small-AnniversaryEdition/1762652579.954578", - "retrieved_timestamp": "1762652579.954578", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small-AnniversaryEdition", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small-AnniversaryEdition" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7403899431286763 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5465437953400678 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5075528700906344 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38069791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4393284574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-THREADRIPPER-Small/a55039b6-922f-4732-9feb-fa757f627ebd.json b/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-THREADRIPPER-Small/a55039b6-922f-4732-9feb-fa757f627ebd.json deleted file mode 100644 index 1ec554961f08ee2fc4014e172f8f3f46c659fd65..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-THREADRIPPER-Small/a55039b6-922f-4732-9feb-fa757f627ebd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-THREADRIPPER-Small/1762652579.9543638", - "retrieved_timestamp": "1762652579.954365", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7689164749531243 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5489785469339065 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4735649546827795 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43492708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4356715425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-Ultra-1.5B-25.02-Exp/ddfae432-5d3c-4c7e-bc7f-087cddea014f.json b/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-Ultra-1.5B-25.02-Exp/ddfae432-5d3c-4c7e-bc7f-087cddea014f.json deleted file mode 100644 index 65fe9e05ce68559a9c7c9cc0b2b8a4164453e398..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-Ultra-1.5B-25.02-Exp/ddfae432-5d3c-4c7e-bc7f-087cddea014f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-Ultra-1.5B-25.02-Exp/1762652579.954794", - "retrieved_timestamp": "1762652579.9547951", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Xiaojian9992024/Qwen2.5-Ultra-1.5B-25.02-Exp", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "Xiaojian9992024/Qwen2.5-Ultra-1.5B-25.02-Exp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4073403015111017 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40655813090204523 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3383125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26412898936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-1M-YOYO-V3/fdc183ed-50d6-40c3-8e7b-02a37fc42a00.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-1M-YOYO-V3/fdc183ed-50d6-40c3-8e7b-02a37fc42a00.json deleted file mode 100644 index 01495428c32b988c00d142ca6e29335b57c7475d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-1M-YOYO-V3/fdc183ed-50d6-40c3-8e7b-02a37fc42a00.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-1M-YOYO-V3/1762652579.955529", - "retrieved_timestamp": "1762652579.95553", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-1M-YOYO-V3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-1M-YOYO-V3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8398327548681941 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6448491305599157 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5354984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.414125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206948138297872 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-0505/1835078d-7897-4517-9d7b-86a2285dfa27.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-0505/1835078d-7897-4517-9d7b-86a2285dfa27.json deleted file mode 100644 index 6d02225a49b482cb5afa3020180c939f1c817076..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-0505/1835078d-7897-4517-9d7b-86a2285dfa27.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-0505/1762652579.9557781", - "retrieved_timestamp": "1762652579.9557781", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-0505", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-0505" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5882912893345214 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6539239511887702 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4433534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47569791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5370678191489362 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-0510-v2/ad6edd05-e83f-4da3-b200-c1d972548e8b.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-0510-v2/ad6edd05-e83f-4da3-b200-c1d972548e8b.json deleted file mode 100644 index e88aa2137c05ca625759ccfd26430402cc1fdd91..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-0510-v2/ad6edd05-e83f-4da3-b200-c1d972548e8b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-0510-v2/1762652579.955989", - "retrieved_timestamp": "1762652579.955989", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-0510-v2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-0510-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.594710922574325 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6552826977321495 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44410876132930516 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47439583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5380651595744681 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-0805/6d4ac88f-7a02-4f78-9990-6736972f43f7.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-0805/6d4ac88f-7a02-4f78-9990-6736972f43f7.json deleted file mode 100644 index b27b7f7d15c1d7757744aa4036abf9ad0a076b86..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-0805/6d4ac88f-7a02-4f78-9990-6736972f43f7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-0805/1762652579.956195", - "retrieved_timestamp": "1762652579.956195", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-0805", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-0805" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5882912893345214 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6539239511887702 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4433534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47569791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5370678191489362 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1005-v2/ed12a458-8c3b-4e08-a218-e94b4fdd89d8.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1005-v2/ed12a458-8c3b-4e08-a218-e94b4fdd89d8.json deleted file mode 100644 index 705d263ea08ad6bed1e799601d74c0ae3c33940f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1005-v2/ed12a458-8c3b-4e08-a218-e94b4fdd89d8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-1005-v2/1762652579.956619", - "retrieved_timestamp": "1762652579.956619", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-1005-v2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-1005-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.595310442958018 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6551321410649699 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4433534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38422818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4730625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5371509308510638 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1005/29058700-6465-476d-b1c9-2bb89d70c52b.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1005/29058700-6465-476d-b1c9-2bb89d70c52b.json deleted file mode 100644 index 483508307eaead2d207a5c166b5e233d4dcdd5e1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1005/29058700-6465-476d-b1c9-2bb89d70c52b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-1005/1762652579.9563992", - "retrieved_timestamp": "1762652579.9564002", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-1005", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-1005" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5971588717935079 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6542059787912534 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.452416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47303125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5382313829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1010-v2/2047ae80-fdc6-4e94-90e6-b3cac52d8c45.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1010-v2/2047ae80-fdc6-4e94-90e6-b3cac52d8c45.json deleted file mode 100644 index df0124e2dfb95426833edd5fc94fb34864668804..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1010-v2/2047ae80-fdc6-4e94-90e6-b3cac52d8c45.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-1010-v2/1762652579.957223", - "retrieved_timestamp": "1762652579.957223", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-1010-v2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-1010-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.594710922574325 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6552826977321495 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44410876132930516 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47439583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5380651595744681 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1010/1de35d6f-c62f-48fd-b921-41e85b55434a.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1010/1de35d6f-c62f-48fd-b921-41e85b55434a.json deleted file mode 100644 index 4bc2550507f5a48d0660b1d6950c7ec895a04f89..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1010/1de35d6f-c62f-48fd-b921-41e85b55434a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-1010/1762652579.957045", - "retrieved_timestamp": "1762652579.957045", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-1010", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-1010" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7904737208384863 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6405986391086301 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4180625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49443151595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1010/6a676239-eed6-44dc-b395-1b2453d5b0ba.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1010/6a676239-eed6-44dc-b395-1b2453d5b0ba.json deleted file mode 100644 index d967872c543905abe94973f96e3ad37c05f2c7d3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1010/6a676239-eed6-44dc-b395-1b2453d5b0ba.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-1010/1762652579.956832", - "retrieved_timestamp": "1762652579.956832", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-1010", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-1010" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5898648918203699 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6539973096042956 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4509063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38338926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47439583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5375664893617021 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-SCE/e0545222-4bd1-490a-a315-5b9ce9742310.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-SCE/e0545222-4bd1-490a-a315-5b9ce9742310.json deleted file mode 100644 index aba520b2f6032753ede02403498fc278940fed78..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-SCE/e0545222-4bd1-490a-a315-5b9ce9742310.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-SCE/1762652579.957431", - "retrieved_timestamp": "1762652579.957431", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-SCE", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-SCE" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5843694729983111 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6489486805510399 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46148036253776437 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37416107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47042708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5380651595744681 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-V4-p1/441375d9-0375-4a15-9d50-267395d3ab13.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-V4-p1/441375d9-0375-4a15-9d50-267395d3ab13.json deleted file mode 100644 index ba7018f285e7647d7f87ec9cabdb29e4a9c2af49..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-V4-p1/441375d9-0375-4a15-9d50-267395d3ab13.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-V4-p1/1762652579.957833", - "retrieved_timestamp": "1762652579.957834", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8203488964835526 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6515535751177631 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5332326283987915 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41942708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5019946808510638 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-V4-p2/9ecdd8a3-247b-46b2-ae3b-5798685329ef.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-V4-p2/9ecdd8a3-247b-46b2-ae3b-5798685329ef.json deleted file mode 100644 index 3410614958480a1d6310c0e93520c28703761873..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-V4-p2/9ecdd8a3-247b-46b2-ae3b-5798685329ef.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-V4-p2/1762652579.958032", - "retrieved_timestamp": "1762652579.9580328", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8047868544351211 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6338919627514907 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5166163141993958 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44345833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49675864361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-V4/c76d318b-eba5-4407-be86-a92051791f00.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-V4/c76d318b-eba5-4407-be86-a92051791f00.json deleted file mode 100644 index b093741219937e3aa704026a14a273cec6b8b815..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-V4/c76d318b-eba5-4407-be86-a92051791f00.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-V4/1762652579.9576309", - "retrieved_timestamp": "1762652579.957632", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-V4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-V4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8397828871837835 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6490345839036636 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41152083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5169547872340425 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-latest-V2/b97b327c-1730-4bfe-b5fe-00dbfcd0d372.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-latest-V2/b97b327c-1730-4bfe-b5fe-00dbfcd0d372.json deleted file mode 100644 index aea1891d1fadad7042c891a8b2cc67fc0ba8e3dc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-latest-V2/b97b327c-1730-4bfe-b5fe-00dbfcd0d372.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-latest-V2/1762652579.958441", - "retrieved_timestamp": "1762652579.958441", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-latest-V2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-latest-V2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7771346693440072 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6299023045601466 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5158610271903323 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540268456375839 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42993750000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5223570478723404 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-latest/d5487f61-9be7-4ffc-af6d-be9f925dd4ba.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-latest/d5487f61-9be7-4ffc-af6d-be9f925dd4ba.json deleted file mode 100644 index a4b69be40001cfa9318485e8ab5e520c6e9722cb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-latest/d5487f61-9be7-4ffc-af6d-be9f925dd4ba.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-latest/1762652579.95823", - "retrieved_timestamp": "1762652579.958231", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-YOYO-latest", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-YOYO-latest" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.591063932587756 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6656232526900528 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4418429003021148 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3825503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.469125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5370678191489362 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-it-restore/ab78a98d-0cad-4215-8f37-f3093066a98d.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-it-restore/ab78a98d-0cad-4215-8f37-f3093066a98d.json deleted file mode 100644 index 6ab667c679944d0d966151bcf1110c9205b90ed0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-it-restore/ab78a98d-0cad-4215-8f37-f3093066a98d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-it-restore/1762652579.958646", - "retrieved_timestamp": "1762652579.958647", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-14B-it-restore", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-14B-it-restore" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8209484168672456 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6387730309916794 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5370090634441088 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40872916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4900265957446808 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-7B-it-restore/2f2577b8-28e3-4fa1-8e65-66e59499b9cd.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-7B-it-restore/2f2577b8-28e3-4fa1-8e65-66e59499b9cd.json deleted file mode 100644 index df9aa3d9429a23f8fc1626065512cf3be3ab93c7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-7B-it-restore/2f2577b8-28e3-4fa1-8e65-66e59499b9cd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-7B-it-restore/1762652579.958842", - "retrieved_timestamp": "1762652579.958842", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-7B-it-restore", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-7B-it-restore" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7530796065550517 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5406524352251431 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40069791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42877327127659576 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-Coder-14B-YOYO-1010/4f6bda51-89d3-4005-9133-db6d871ae87d.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-Coder-14B-YOYO-1010/4f6bda51-89d3-4005-9133-db6d871ae87d.json deleted file mode 100644 index 8909b69eae98031b962c12dcfd5c3e5c9f454e2c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-Coder-14B-YOYO-1010/4f6bda51-89d3-4005-9133-db6d871ae87d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-Coder-14B-YOYO-1010/1762652579.9590368", - "retrieved_timestamp": "1762652579.959038", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "YOYO-AI/Qwen2.5-Coder-14B-YOYO-1010", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/Qwen2.5-Coder-14B-YOYO-1010" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5335864395359867 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6186663964199025 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3217522658610272 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4422395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4074966755319149 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V2/0c7e0639-a082-47f1-bf32-0c45ce573f0a.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V2/0c7e0639-a082-47f1-bf32-0c45ce573f0a.json deleted file mode 100644 index 00317316d1d63101b4b2060f34daa2efc5ae87cf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V2/0c7e0639-a082-47f1-bf32-0c45ce573f0a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V2/1762652579.959567", - "retrieved_timestamp": "1762652579.9595678", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5070834275278483 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6452083564140533 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3542296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46890625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5371509308510638 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V3/4f85534a-0b12-42c4-a0d3-06d4d8337e0c.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V3/4f85534a-0b12-42c4-a0d3-06d4d8337e0c.json deleted file mode 100644 index 12f54f45d8aceb80bbea388cc508562eb004c1c1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V3/4f85534a-0b12-42c4-a0d3-06d4d8337e0c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V3/1762652579.959789", - "retrieved_timestamp": "1762652579.959789", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8577928784513978 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6359248665982408 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.527190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40215625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4881150265957447 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V4/f5b253b5-4c42-49f8-9f3f-d85a5b2502c0.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V4/f5b253b5-4c42-49f8-9f3f-d85a5b2502c0.json deleted file mode 100644 index 4a8dbcfe4daee43f0c5dc7fe96bbf6ac1a7ae102..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V4/f5b253b5-4c42-49f8-9f3f-d85a5b2502c0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V4/1762652579.959998", - "retrieved_timestamp": "1762652579.959999", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8364605912312664 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.651497220848125 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5392749244712991 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44342708333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5203623670212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_ZYH-LLM-Qwen2.5-14B/2dd14fef-53f5-491d-a5e1-7e19f6043049.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_ZYH-LLM-Qwen2.5-14B/2dd14fef-53f5-491d-a5e1-7e19f6043049.json deleted file mode 100644 index 5350741596b01a6b031e48662c40d38ce1ab4a3e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_ZYH-LLM-Qwen2.5-14B/2dd14fef-53f5-491d-a5e1-7e19f6043049.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/YOYO-AI_ZYH-LLM-Qwen2.5-14B/1762652579.959276", - "retrieved_timestamp": "1762652579.9592772", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "YOYO-AI/ZYH-LLM-Qwen2.5-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.594111402190632 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6644460038734455 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.411631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47569791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5350731382978723 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YoungPanda_qwenqwen/7e4c528f-bb42-40e7-b849-86732d2f2a18.json b/leaderboard_data/HFOpenLLMv2/alibaba/YoungPanda_qwenqwen/7e4c528f-bb42-40e7-b849-86732d2f2a18.json deleted file mode 100644 index 70cd583a8b5ef494528a21f61f90487fbcf06610..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/YoungPanda_qwenqwen/7e4c528f-bb42-40e7-b849-86732d2f2a18.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/YoungPanda_qwenqwen/1762652579.964632", - "retrieved_timestamp": "1762652579.964633", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "YoungPanda/qwenqwen", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "YoungPanda/qwenqwen" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12639684924888184 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337898518087465 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.035498489425981876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34336458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11677194148936171 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 14.316 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen-2.5-Aether-SlerpFusion-7B/8b61e7aa-3ba3-4e25-b1bf-9718970a111a.json b/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen-2.5-Aether-SlerpFusion-7B/8b61e7aa-3ba3-4e25-b1bf-9718970a111a.json deleted file mode 100644 index b25d9ffc3ba36958ae32b26676e4da2bda8fdbcc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen-2.5-Aether-SlerpFusion-7B/8b61e7aa-3ba3-4e25-b1bf-9718970a111a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ZeroXClem_Qwen-2.5-Aether-SlerpFusion-7B/1762652579.9677062", - "retrieved_timestamp": "1762652579.9677062", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ZeroXClem/Qwen-2.5-Aether-SlerpFusion-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ZeroXClem/Qwen-2.5-Aether-SlerpFusion-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6261597007052399 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5462236205548866 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27341389728096677 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41778125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43267952127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen2.5-7B-CelestialHarmony-1M/d912a685-7187-4b56-a7a8-881ed678ae2f.json b/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen2.5-7B-CelestialHarmony-1M/d912a685-7187-4b56-a7a8-881ed678ae2f.json deleted file mode 100644 index 48030382ee388ae717c3679d8aec0d7da239fe5a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen2.5-7B-CelestialHarmony-1M/d912a685-7187-4b56-a7a8-881ed678ae2f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ZeroXClem_Qwen2.5-7B-CelestialHarmony-1M/1762652579.967964", - "retrieved_timestamp": "1762652579.967965", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5943862285402732 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5431374181474681 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4595416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4386635638297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen2.5-7B-HomerAnvita-NerdMix/500a7a12-9c94-4ed8-b2b4-33473141c3c7.json b/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen2.5-7B-HomerAnvita-NerdMix/500a7a12-9c94-4ed8-b2b4-33473141c3c7.json deleted file mode 100644 index e4eb8094e0665070345c8c1d212711e7c959511a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen2.5-7B-HomerAnvita-NerdMix/500a7a12-9c94-4ed8-b2b4-33473141c3c7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ZeroXClem_Qwen2.5-7B-HomerAnvita-NerdMix/1762652579.96818", - "retrieved_timestamp": "1762652579.968181", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7707649037886142 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5541319848156986 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38368580060422963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43905208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4431515957446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen2.5-7B-HomerCreative-Mix/336aaa71-3f35-48f3-bede-cb9ab3324cfc.json b/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen2.5-7B-HomerCreative-Mix/336aaa71-3f35-48f3-bede-cb9ab3324cfc.json deleted file mode 100644 index 9c55a92b8c9fdd57ac2e90cce0a2ba173521fc2a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen2.5-7B-HomerCreative-Mix/336aaa71-3f35-48f3-bede-cb9ab3324cfc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ZeroXClem_Qwen2.5-7B-HomerCreative-Mix/1762652579.968384", - "retrieved_timestamp": "1762652579.968385", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ZeroXClem/Qwen2.5-7B-HomerCreative-Mix", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ZeroXClem/Qwen2.5-7B-HomerCreative-Mix" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7835044348994002 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5548068560095062 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3564954682779456 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43495833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4447307180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen2.5-7B-Qandora-CySec/7a495a80-f712-477b-bd5c-0cf7a07e8ef2.json b/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen2.5-7B-Qandora-CySec/7a495a80-f712-477b-bd5c-0cf7a07e8ef2.json deleted file mode 100644 index 19945da3ff36ddbdeb390c1cc002166d1820653e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen2.5-7B-Qandora-CySec/7a495a80-f712-477b-bd5c-0cf7a07e8ef2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ZeroXClem_Qwen2.5-7B-Qandora-CySec/1762652579.968593", - "retrieved_timestamp": "1762652579.9685938", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ZeroXClem/Qwen2.5-7B-Qandora-CySec", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ZeroXClem/Qwen2.5-7B-Qandora-CySec" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6773172958860268 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5490022663689288 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2930513595166163 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4286041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4484707446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/abacusai_Liberated-Qwen1.5-14B/614f3e27-e150-4edb-9438-06d0b0f38ca3.json b/leaderboard_data/HFOpenLLMv2/alibaba/abacusai_Liberated-Qwen1.5-14B/614f3e27-e150-4edb-9438-06d0b0f38ca3.json deleted file mode 100644 index ee6f1d25906035003c6a7711b3de44fdb8fde4cf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/abacusai_Liberated-Qwen1.5-14B/614f3e27-e150-4edb-9438-06d0b0f38ca3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/abacusai_Liberated-Qwen1.5-14B/1762652579.9698281", - "retrieved_timestamp": "1762652579.9698281", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "abacusai/Liberated-Qwen1.5-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "abacusai/Liberated-Qwen1.5-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36310212458499 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49480009174671863 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41746875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35123005319148937 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/adriszmar_QAIMath-Qwen2.5-7B-TIES/457f0bc3-68e1-4ecb-a983-5f504b1246cd.json b/leaderboard_data/HFOpenLLMv2/alibaba/adriszmar_QAIMath-Qwen2.5-7B-TIES/457f0bc3-68e1-4ecb-a983-5f504b1246cd.json deleted file mode 100644 index 29956dc935e42b95316623db16a4a0e4e61b64c8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/adriszmar_QAIMath-Qwen2.5-7B-TIES/457f0bc3-68e1-4ecb-a983-5f504b1246cd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/adriszmar_QAIMath-Qwen2.5-7B-TIES/1762652579.975151", - "retrieved_timestamp": "1762652579.975153", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "adriszmar/QAIMath-Qwen2.5-7B-TIES", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "adriszmar/QAIMath-Qwen2.5-7B-TIES" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16853725891745014 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31242688274884584 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0015105740181268882 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39629166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10663231382978723 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/adriszmar_QAIMath-Qwen2.5-7B-TIES/78544e05-7eed-465d-9199-35b25e1bebfe.json b/leaderboard_data/HFOpenLLMv2/alibaba/adriszmar_QAIMath-Qwen2.5-7B-TIES/78544e05-7eed-465d-9199-35b25e1bebfe.json deleted file mode 100644 index c5126a5c1dc78a3ec63da3b597f98f6d5f83ed05..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/adriszmar_QAIMath-Qwen2.5-7B-TIES/78544e05-7eed-465d-9199-35b25e1bebfe.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/adriszmar_QAIMath-Qwen2.5-7B-TIES/1762652579.9747589", - "retrieved_timestamp": "1762652579.9747598", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "adriszmar/QAIMath-Qwen2.5-7B-TIES", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "adriszmar/QAIMath-Qwen2.5-7B-TIES" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.174632198123202 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3126379538396578 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24496644295302014 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40959375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10871010638297872 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-42B-AGI/de6fe2ab-47de-4616-a0b9-b2cb6f44b16b.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-42B-AGI/de6fe2ab-47de-4616-a0b9-b2cb6f44b16b.json deleted file mode 100644 index 857d3e4c4875a1b5380876196f2219ae6a6f26f9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-42B-AGI/de6fe2ab-47de-4616-a0b9-b2cb6f44b16b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-42B-AGI/1762652579.9983659", - "retrieved_timestamp": "1762652579.998367", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Qwen2.5-42B-AGI", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/Qwen2.5-42B-AGI" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19129354557019818 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2942104150907988 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36203125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11677194148936171 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 42.516 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task2/3518e992-9548-4025-a641-99a2cf3833e4.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task2/3518e992-9548-4025-a641-99a2cf3833e4.json deleted file mode 100644 index 7b705af0567fd1470ba073b14732eb7ceb881f24..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task2/3518e992-9548-4025-a641-99a2cf3833e4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-7B-task2/1762652579.998622", - "retrieved_timestamp": "1762652579.998623", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Qwen2.5-7B-task2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/Qwen2.5-7B-task2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45270327176336567 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5625940266685543 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3549848942598187 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43696874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4517121010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task3/0c556e08-bb71-406c-88b8-d45fc4cc43f0.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task3/0c556e08-bb71-406c-88b8-d45fc4cc43f0.json deleted file mode 100644 index c4fa1605cf962cab28f00b180b789f1771fdb8fc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task3/0c556e08-bb71-406c-88b8-d45fc4cc43f0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-7B-task3/1762652579.998833", - "retrieved_timestamp": "1762652579.998834", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Qwen2.5-7B-task3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/Qwen2.5-7B-task3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.512903540383959 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5397623813486384 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26057401812688824 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43557291666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45013297872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task4/a200d34f-8ed0-4f1d-93e2-cff38b1811f9.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task4/a200d34f-8ed0-4f1d-93e2-cff38b1811f9.json deleted file mode 100644 index 7e270f5d2aa98a3912904bc61811a71864c0d9f7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task4/a200d34f-8ed0-4f1d-93e2-cff38b1811f9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-7B-task4/1762652579.999042", - "retrieved_timestamp": "1762652579.999042", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Qwen2.5-7B-task4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/Qwen2.5-7B-task4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5005385709916355 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5583446038580263 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43954166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45611702127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task7/b5b02465-0d3f-4ccc-a104-174fcf53dc9a.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task7/b5b02465-0d3f-4ccc-a104-174fcf53dc9a.json deleted file mode 100644 index cfff1e9e2998c037931ba53545ac0a496689dd12..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task7/b5b02465-0d3f-4ccc-a104-174fcf53dc9a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-7B-task7/1762652579.999242", - "retrieved_timestamp": "1762652579.999243", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Qwen2.5-7B-task7", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/Qwen2.5-7B-task7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42842325030917966 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.555243179835915 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4325625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4133144946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task8/956640e9-97a3-4641-9ed0-a63831a8ee58.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task8/956640e9-97a3-4641-9ed0-a63831a8ee58.json deleted file mode 100644 index 8944fce29e71fee315e5340a5750e549c727fb2d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task8/956640e9-97a3-4641-9ed0-a63831a8ee58.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-7B-task8/1762652579.9994612", - "retrieved_timestamp": "1762652579.999462", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Qwen2.5-7B-task8", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/Qwen2.5-7B-task8" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4645185884564068 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5524895381578828 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3527190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45144791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44331781914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-slerp-14B/ba80d36c-7688-40e8-8182-251c6b9e6b19.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-slerp-14B/ba80d36c-7688-40e8-8182-251c6b9e6b19.json deleted file mode 100644 index 767c315318dfd708169142115b4ebd5d3e5e2666..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-slerp-14B/ba80d36c-7688-40e8-8182-251c6b9e6b19.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-slerp-14B/1762652579.999685", - "retrieved_timestamp": "1762652579.999686", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Qwen2.5-slerp-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/Qwen2.5-slerp-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49282016161562425 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.65124197415124 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4622356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3674496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47439583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5378989361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenSlerp12-7B/18c67de4-1518-44b6-b92f-b490e9d55877.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenSlerp12-7B/18c67de4-1518-44b6-b92f-b490e9d55877.json deleted file mode 100644 index 4f2abde9964a5eaa0425835e0fea82540b093663..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenSlerp12-7B/18c67de4-1518-44b6-b92f-b490e9d55877.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_QwenSlerp12-7B/1762652579.999902", - "retrieved_timestamp": "1762652579.999903", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/QwenSlerp12-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/QwenSlerp12-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5075577246151324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5556448443090559 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45947916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4460605053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenSlerp4-14B/1393cab1-31aa-470c-bca1-53f99d7ea1e8.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenSlerp4-14B/1393cab1-31aa-470c-bca1-53f99d7ea1e8.json deleted file mode 100644 index ea61568c4befc2d79d95dae578ed2d2d7ee9a9c3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenSlerp4-14B/1393cab1-31aa-470c-bca1-53f99d7ea1e8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_QwenSlerp4-14B/1762652580.000124", - "retrieved_timestamp": "1762652580.000125", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/QwenSlerp4-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/QwenSlerp4-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6327544249258634 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6483250205703057 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3693353474320242 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3724832214765101 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46496875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5435505319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenSlerp5-14B/da7928ec-55b8-4d4b-9b9e-b40c5de7136b.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenSlerp5-14B/da7928ec-55b8-4d4b-9b9e-b40c5de7136b.json deleted file mode 100644 index f8881ba9715e893a12486fc9abd66d2e662126ab..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenSlerp5-14B/da7928ec-55b8-4d4b-9b9e-b40c5de7136b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_QwenSlerp5-14B/1762652580.000389", - "retrieved_timestamp": "1762652580.0003898", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/QwenSlerp5-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/QwenSlerp5-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7119387669162267 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6356573710010681 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3564954682779456 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3649328859060403 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4675416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5390625 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenSlerp6-14B/5135513f-f255-412b-ab16-f0d613e4525e.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenSlerp6-14B/5135513f-f255-412b-ab16-f0d613e4525e.json deleted file mode 100644 index fa75519e2a20690138410732f12ed2a21b7c392e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenSlerp6-14B/5135513f-f255-412b-ab16-f0d613e4525e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_QwenSlerp6-14B/1762652580.0006049", - "retrieved_timestamp": "1762652580.000606", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/QwenSlerp6-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/QwenSlerp6-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6866846633598851 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6384454358065165 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3723564954682779 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46896875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5405585106382979 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenStock1-14B/95c86ae6-dcb7-4ed7-a82d-ce0b374cca0e.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenStock1-14B/95c86ae6-dcb7-4ed7-a82d-ce0b374cca0e.json deleted file mode 100644 index 9f9cd83f52fde25f8fa78d2fae984cf4ac788f21..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenStock1-14B/95c86ae6-dcb7-4ed7-a82d-ce0b374cca0e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_QwenStock1-14B/1762652580.0008268", - "retrieved_timestamp": "1762652580.0008278", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/QwenStock1-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/QwenStock1-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5634117474966422 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6528491305599156 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3768882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3766778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47296875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5418051861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenStock2-14B/4a4c258b-2b03-4fad-a5e0-b623a25fb735.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenStock2-14B/4a4c258b-2b03-4fad-a5e0-b623a25fb735.json deleted file mode 100644 index e1b39207a1a0f3b76e0d04124c2c17866fbde40e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenStock2-14B/4a4c258b-2b03-4fad-a5e0-b623a25fb735.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_QwenStock2-14B/1762652580.001041", - "retrieved_timestamp": "1762652580.001042", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/QwenStock2-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/QwenStock2-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5563427261887348 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.656885010139055 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38821752265861026 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47560416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5405585106382979 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenStock3-14B/2b3928ad-ab69-4e63-aa3c-e64dea7b5e6c.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenStock3-14B/2b3928ad-ab69-4e63-aa3c-e64dea7b5e6c.json deleted file mode 100644 index 25a231beb09fb7de10575252a8567dd12efa7ef0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenStock3-14B/2b3928ad-ab69-4e63-aa3c-e64dea7b5e6c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_QwenStock3-14B/1762652580.0012438", - "retrieved_timestamp": "1762652580.001245", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/QwenStock3-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/QwenStock3-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5615134509767417 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6565322062808641 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3776435045317221 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3783557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4755729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5428025265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwenslerp2-14B/636ed71e-3d86-4d5d-8b8d-3019f26261fc.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwenslerp2-14B/636ed71e-3d86-4d5d-8b8d-3019f26261fc.json deleted file mode 100644 index a3efa01640491d6360bb1e311f26de17f534943c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwenslerp2-14B/636ed71e-3d86-4d5d-8b8d-3019f26261fc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Qwenslerp2-14B/1762652580.001452", - "retrieved_timestamp": "1762652580.0014532", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Qwenslerp2-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/Qwenslerp2-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5007136619724553 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6554876216007552 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44561933534743203 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36828859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4729375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5403091755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwenslerp2-7B/a1e6f539-f5d7-4f57-b0da-4df7e5a86240.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwenslerp2-7B/a1e6f539-f5d7-4f57-b0da-4df7e5a86240.json deleted file mode 100644 index 87145601383c5c14956347e4dde724e4ec7d140f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwenslerp2-7B/a1e6f539-f5d7-4f57-b0da-4df7e5a86240.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Qwenslerp2-7B/1762652580.001649", - "retrieved_timestamp": "1762652580.0016499", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Qwenslerp2-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/Qwenslerp2-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5294396645345462 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5609127334788001 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3421450151057402 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4356041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4515458776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwenslerp3-14B/06a2a807-3dbc-42c4-adec-4d6caa01cf74.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwenslerp3-14B/06a2a807-3dbc-42c4-adec-4d6caa01cf74.json deleted file mode 100644 index 2a087c5084f8b6f2248acdf94c0e670ef85d14f8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwenslerp3-14B/06a2a807-3dbc-42c4-adec-4d6caa01cf74.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Qwenslerp3-14B/1762652580.001856", - "retrieved_timestamp": "1762652580.001856", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Qwenslerp3-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/Qwenslerp3-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5052349986923584 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6520835120117142 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44637462235649544 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46760416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5394780585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwenslerp3-7B/88727af1-7672-4ab5-9cc4-f56d286f3967.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwenslerp3-7B/88727af1-7672-4ab5-9cc4-f56d286f3967.json deleted file mode 100644 index cc6eb877f8217f1f5217dce8f4d15477c7c09c7f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwenslerp3-7B/88727af1-7672-4ab5-9cc4-f56d286f3967.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Qwenslerp3-7B/1762652580.0020611", - "retrieved_timestamp": "1762652580.002062", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Qwenslerp3-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/Qwenslerp3-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.501837347127843 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5580160200086862 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3217522658610272 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45151041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45420545212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Rombos-LLM-V2.5-Qwen-42b/619fde94-d095-4f5c-b36d-19a38b6a8109.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Rombos-LLM-V2.5-Qwen-42b/619fde94-d095-4f5c-b36d-19a38b6a8109.json deleted file mode 100644 index ef86715ffa6154129fc30d6c9522e4fad3118e3f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Rombos-LLM-V2.5-Qwen-42b/619fde94-d095-4f5c-b36d-19a38b6a8109.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Rombos-LLM-V2.5-Qwen-42b/1762652580.002683", - "retrieved_timestamp": "1762652580.002683", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Rombos-LLM-V2.5-Qwen-42b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "allknowingroger/Rombos-LLM-V2.5-Qwen-42b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1879213819332704 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2969164076001621 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36333333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11677194148936171 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 42.516 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-1.5B-Blunt/d75b9105-a60d-49d9-8606-7b23ff5d3d1a.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-1.5B-Blunt/d75b9105-a60d-49d9-8606-7b23ff5d3d1a.json deleted file mode 100644 index 0cbe3bfeded6d2ff3c2cbd00d7870449e93f8d53..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-1.5B-Blunt/d75b9105-a60d-49d9-8606-7b23ff5d3d1a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-1.5B-Blunt/1762652580.03596", - "retrieved_timestamp": "1762652580.0359628", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Blunt", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Blunt" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.261136008014291 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27743669901671336 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13821752265861026 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35952083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11835106382978723 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-1.5B-Reflective/40933520-61e0-4cbe-b6b2-b4d19063a1b9.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-1.5B-Reflective/40933520-61e0-4cbe-b6b2-b4d19063a1b9.json deleted file mode 100644 index 21a23904e15d84bb78da95fe352beb7912267319..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-1.5B-Reflective/40933520-61e0-4cbe-b6b2-b4d19063a1b9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-1.5B-Reflective/1762652580.0363572", - "retrieved_timestamp": "1762652580.0363579", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Reflective", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Reflective" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30327641768285923 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2908444769655102 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16314199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33555208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11303191489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-ABUB-ST/46a36382-df06-4dc1-93ae-6ae61343a969.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-ABUB-ST/46a36382-df06-4dc1-93ae-6ae61343a969.json deleted file mode 100644 index 1365fab3ddf472888c0ec84ac6b922cdaad9ef67..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-ABUB-ST/46a36382-df06-4dc1-93ae-6ae61343a969.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-ABUB-ST/1762652580.036823", - "retrieved_timestamp": "1762652580.036824", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-ABUB-ST", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-ABUB-ST" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3751922676276723 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4926903187457697 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5015105740181269 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447986577181208 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4220625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42428523936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective/269f307e-3af1-47a2-92ec-00a59b4725ac.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective/269f307e-3af1-47a2-92ec-00a59b4725ac.json deleted file mode 100644 index 4e17e6d84613df2c669d2cf0b0f1e9d95288f1a5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective/269f307e-3af1-47a2-92ec-00a59b4725ac.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective/1762652580.03794", - "retrieved_timestamp": "1762652580.037941", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.554044380022784 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337106084887115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23716012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4247604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15043218085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt/244417b6-88a2-483f-adba-c1d944c9cc29.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt/244417b6-88a2-483f-adba-c1d944c9cc29.json deleted file mode 100644 index 869bb2c52915094c2cf55874de50d7cdddca568f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt/244417b6-88a2-483f-adba-c1d944c9cc29.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt/1762652580.037686", - "retrieved_timestamp": "1762652580.037687", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5221456845614081 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3198581755956472 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25075528700906347 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4526979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14835438829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective/1bf5eb2a-c0e2-4bfc-9ae1-ec5737974cbe.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective/1bf5eb2a-c0e2-4bfc-9ae1-ec5737974cbe.json deleted file mode 100644 index ea8005b0c5ced8bcd0fc636eb07c9e0ebc51be91..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective/1bf5eb2a-c0e2-4bfc-9ae1-ec5737974cbe.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective/1762652580.038195", - "retrieved_timestamp": "1762652580.038196", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5139274901705253 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3013444769655102 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1472809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44333333333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12890625 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored/41186ba2-77da-496c-afd0-c0f11ea05c9b.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored/41186ba2-77da-496c-afd0-c0f11ea05c9b.json deleted file mode 100644 index 1ecb347630a29013eb8886a577dd9221ea7d7e54..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored/41186ba2-77da-496c-afd0-c0f11ea05c9b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored/1762652580.037415", - "retrieved_timestamp": "1762652580.037416", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5421791956453321 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3170339746824052 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16314199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4486979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14311835106382978 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt/407adfd5-6a1f-420a-a5de-2e37740d7025.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt/407adfd5-6a1f-420a-a5de-2e37740d7025.json deleted file mode 100644 index f2256bd73bc8601ffa50369688ecc3b27cd591b7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt/407adfd5-6a1f-420a-a5de-2e37740d7025.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt/1762652580.0370848", - "retrieved_timestamp": "1762652580.037087", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5611632690151022 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32828968244496226 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16389728096676737 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45542708333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14469747340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Reflective/744cef52-b155-4bb0-9411-2eb47938b5d6.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Reflective/744cef52-b155-4bb0-9411-2eb47938b5d6.json deleted file mode 100644 index 4466383a22364781a26b5c640fbad5acc36b3dad..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Reflective/744cef52-b155-4bb0-9411-2eb47938b5d6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Reflective/1762652580.038453", - "retrieved_timestamp": "1762652580.038454", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Reflective", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Reflective" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4290227706928727 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.301225755504323 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19184290030211482 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4553958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11294880319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B/f269f0cb-4f9b-4f29-84c2-a4f31ff08290.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B/f269f0cb-4f9b-4f29-84c2-a4f31ff08290.json deleted file mode 100644 index 843f11416535adfb30bf2e39f3639a91416a0647..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B/f269f0cb-4f9b-4f29-84c2-a4f31ff08290.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B/1762652580.036597", - "retrieved_timestamp": "1762652580.036598", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4171575863154209 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30329653176003074 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4487916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11269946808510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-7B-Blunt/678a08d8-3089-4d97-879d-c5485344de05.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-7B-Blunt/678a08d8-3089-4d97-879d-c5485344de05.json deleted file mode 100644 index b12e2691375ed5d885d36fe761347e93ad526989..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-7B-Blunt/678a08d8-3089-4d97-879d-c5485344de05.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-7B-Blunt/1762652580.03893", - "retrieved_timestamp": "1762652580.038931", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-7B-Blunt", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-7B-Blunt" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4266246891581005 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29017781029884354 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38851041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11693816489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored/9c8db160-fc92-473f-a766-fb00fc099f6e.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored/9c8db160-fc92-473f-a766-fb00fc099f6e.json deleted file mode 100644 index 34eefcbdfbce3e2f8c65a150cbdf8f9dbc8fc517..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored/9c8db160-fc92-473f-a766-fb00fc099f6e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored/1762652580.03921", - "retrieved_timestamp": "1762652580.039211", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3654503384353515 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2958444769655102 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17371601208459214 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38460416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11328125 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-7B-Reflective/fd05a73b-5b6a-460e-85d5-547710ab6bac.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-7B-Reflective/fd05a73b-5b6a-460e-85d5-547710ab6bac.json deleted file mode 100644 index 8a96518e55de4ff3ac74653ac4b2fcedf6be5db0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-7B-Reflective/fd05a73b-5b6a-460e-85d5-547710ab6bac.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-7B-Reflective/1762652580.039571", - "retrieved_timestamp": "1762652580.039572", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-7B-Reflective", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-7B-Reflective" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3921783091087204 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2906778102988436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20241691842900303 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38999999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1155252659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-7B/b4c9ec76-b126-4715-b3cf-c0d8a8a61d44.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-7B/b4c9ec76-b126-4715-b3cf-c0d8a8a61d44.json deleted file mode 100644 index e117f9ff4c2ea51d761f4de972308e8172bafc42..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-7B/b4c9ec76-b126-4715-b3cf-c0d8a8a61d44.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-7B/1762652580.0386932", - "retrieved_timestamp": "1762652580.038694", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "braindao/DeepSeek-R1-Distill-Qwen-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/DeepSeek-R1-Distill-Qwen-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39679938119744496 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2886778102988436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19184290030211482 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37666666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1141123670212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_Qwen2.5-14B/7be8016c-2454-4228-b10d-badba12e845b.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_Qwen2.5-14B/7be8016c-2454-4228-b10d-badba12e845b.json deleted file mode 100644 index 48a07267d43e64f1684adebfe1dbf76ecf2d1552..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_Qwen2.5-14B/7be8016c-2454-4228-b10d-badba12e845b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/braindao_Qwen2.5-14B/1762652580.039853", - "retrieved_timestamp": "1762652580.039854", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "braindao/Qwen2.5-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "braindao/Qwen2.5-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.540854931581537 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5852660409288039 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29229607250755285 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41235416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48836436170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_CyberCore-Qwen-2.1-7B/131132b7-5b2a-421f-aa02-360ef9b7f206.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_CyberCore-Qwen-2.1-7B/131132b7-5b2a-421f-aa02-360ef9b7f206.json deleted file mode 100644 index df995b7d0c80d3ba6b455b749ee77294ee57fb48..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_CyberCore-Qwen-2.1-7B/131132b7-5b2a-421f-aa02-360ef9b7f206.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_CyberCore-Qwen-2.1-7B/1762652580.0426219", - "retrieved_timestamp": "1762652580.042623", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/CyberCore-Qwen-2.1-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/CyberCore-Qwen-2.1-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5765757080103016 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5572089082936126 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35876132930513593 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4144895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4444813829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_DeepQwen-3B-LCoT-SCE/49243e70-a24d-4e0c-b4c6-4275be1db944.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_DeepQwen-3B-LCoT-SCE/49243e70-a24d-4e0c-b4c6-4275be1db944.json deleted file mode 100644 index 16eea3303e2a18dcb0c0168830434cf44724be40..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_DeepQwen-3B-LCoT-SCE/49243e70-a24d-4e0c-b4c6-4275be1db944.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_DeepQwen-3B-LCoT-SCE/1762652580.042877", - "retrieved_timestamp": "1762652580.042878", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/DeepQwen-3B-LCoT-SCE", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/DeepQwen-3B-LCoT-SCE" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4489809261647983 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45123121380305237 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24697885196374622 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35139583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3289561170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.396 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_DeepSeek-R1-Distill-Qwen-7B-RRP-Ex/7e6a55fb-da39-4b16-a59b-70635e636c02.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_DeepSeek-R1-Distill-Qwen-7B-RRP-Ex/7e6a55fb-da39-4b16-a59b-70635e636c02.json deleted file mode 100644 index f99a6b123dca7fda861da200c31636d4d5e7b4a9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_DeepSeek-R1-Distill-Qwen-7B-RRP-Ex/7e6a55fb-da39-4b16-a59b-70635e636c02.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_DeepSeek-R1-Distill-Qwen-7B-RRP-Ex/1762652580.043099", - "retrieved_timestamp": "1762652580.043099", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/DeepSeek-R1-Distill-Qwen-7B-RRP-Ex", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/DeepSeek-R1-Distill-Qwen-7B-RRP-Ex" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39010492160800014 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3494110718041537 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16540785498489427 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3663125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2508311170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_FwF-Qwen-7B-0.1/bfaeefb1-93c9-470b-9376-9c67a1d20862.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_FwF-Qwen-7B-0.1/bfaeefb1-93c9-470b-9376-9c67a1d20862.json deleted file mode 100644 index 8dd4a817c4683229b58118c5695d2a2b912b886b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_FwF-Qwen-7B-0.1/bfaeefb1-93c9-470b-9376-9c67a1d20862.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_FwF-Qwen-7B-0.1/1762652580.04422", - "retrieved_timestamp": "1762652580.044221", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/FwF-Qwen-7B-0.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/FwF-Qwen-7B-0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30045390674521383 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5019272523147252 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39520833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4060837765957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_FwF-Qwen-7B-0.2/ee7b9254-5e4a-46a0-a8b3-2ecc1708e6ab.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_FwF-Qwen-7B-0.2/ee7b9254-5e4a-46a0-a8b3-2ecc1708e6ab.json deleted file mode 100644 index 2b01dac53def0e561ca31dda9696290712fade2c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_FwF-Qwen-7B-0.2/ee7b9254-5e4a-46a0-a8b3-2ecc1708e6ab.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_FwF-Qwen-7B-0.2/1762652580.044472", - "retrieved_timestamp": "1762652580.0444732", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/FwF-Qwen-7B-0.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/FwF-Qwen-7B-0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44790710869382133 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5596406929346521 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42178125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4382480053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Deep-Sky-T1/33cc8f90-d019-49d9-8220-d66260659435.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Deep-Sky-T1/33cc8f90-d019-49d9-8220-d66260659435.json deleted file mode 100644 index 5f4e2ab6188d210adca72f2d28c8de416a65b80e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Deep-Sky-T1/33cc8f90-d019-49d9-8220-d66260659435.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Deep-Sky-T1/1762652580.0542989", - "retrieved_timestamp": "1762652580.0542998", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen-2.5-7B-Deep-Sky-T1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen-2.5-7B-Deep-Sky-T1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42080457630198986 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4139878251775055 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40181249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2103557180851064 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Deep-Stock-v1/a9fe98a7-e143-4100-99cd-adea90917c4c.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Deep-Stock-v1/a9fe98a7-e143-4100-99cd-adea90917c4c.json deleted file mode 100644 index 9cdf30ea72d68c035abc2db55c27881279084ab9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Deep-Stock-v1/a9fe98a7-e143-4100-99cd-adea90917c4c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Deep-Stock-v1/1762652580.054558", - "retrieved_timestamp": "1762652580.054559", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen-2.5-7B-Deep-Stock-v1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen-2.5-7B-Deep-Stock-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5695066867023941 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5361336083539997 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26435045317220546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4108958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40658244680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Deep-Stock-v4/56ae78dc-3cae-43b0-afc9-e6fac3c6556a.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Deep-Stock-v4/56ae78dc-3cae-43b0-afc9-e6fac3c6556a.json deleted file mode 100644 index 6992447860d2571bb69f6bd8ba2d8717cedfbefc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Deep-Stock-v4/56ae78dc-3cae-43b0-afc9-e6fac3c6556a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Deep-Stock-v4/1762652580.054795", - "retrieved_timestamp": "1762652580.054796", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen-2.5-7B-Deep-Stock-v4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen-2.5-7B-Deep-Stock-v4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7752862405085175 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5452765042799131 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48942598187311176 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41269791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4341755319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Deep-Stock-v5/39ce157b-e374-4963-8b40-6393835574f5.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Deep-Stock-v5/39ce157b-e374-4963-8b40-6393835574f5.json deleted file mode 100644 index 7ad5aeb50ef2a46218e0cd2641bf2e34ed77366a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Deep-Stock-v5/39ce157b-e374-4963-8b40-6393835574f5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Deep-Stock-v5/1762652580.05501", - "retrieved_timestamp": "1762652580.055011", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen-2.5-7B-Deep-Stock-v5", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen-2.5-7B-Deep-Stock-v5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45090471061228654 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4672461238794705 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1472809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3648229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28316156914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Exp-Sce/c57286a9-ee0c-48e7-814e-8f2aa8e9688a.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Exp-Sce/c57286a9-ee0c-48e7-814e-8f2aa8e9688a.json deleted file mode 100644 index f9c862b49f75cf29ab48490a31eb3b26bb9e997e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Exp-Sce/c57286a9-ee0c-48e7-814e-8f2aa8e9688a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Exp-Sce/1762652580.055233", - "retrieved_timestamp": "1762652580.055233", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen-2.5-7B-Exp-Sce", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen-2.5-7B-Exp-Sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.765169749597734 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5505865059891896 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3255287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44302083333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42586436170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-R1-Stock/672e66ed-80e2-4b45-b52c-d9265f8efac8.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-R1-Stock/672e66ed-80e2-4b45-b52c-d9265f8efac8.json deleted file mode 100644 index 19496e84cbba9d2a97588ca21e4be2732d1f35a1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-R1-Stock/672e66ed-80e2-4b45-b52c-d9265f8efac8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-R1-Stock/1762652580.055454", - "retrieved_timestamp": "1762652580.055455", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen-2.5-7B-R1-Stock", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen-2.5-7B-R1-Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7573261169253137 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5393363105747148 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5007552870090635 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3993645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.429438164893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Stock-Deep-Bespoke/af89079b-b84e-48f1-876a-ebf2d933d91e.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Stock-Deep-Bespoke/af89079b-b84e-48f1-876a-ebf2d933d91e.json deleted file mode 100644 index d55ec55db8e1b11733786110aaa5fc561af8b469..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Stock-Deep-Bespoke/af89079b-b84e-48f1-876a-ebf2d933d91e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Stock-Deep-Bespoke/1762652580.0556722", - "retrieved_timestamp": "1762652580.0556731", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen-2.5-7B-Stock-Deep-Bespoke", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen-2.5-7B-Stock-Deep-Bespoke" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206219497599702 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49203477801491813 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18882175226586104 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4068020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3579621010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7b-S1k/e7394d5d-4253-4a53-8a0a-73b0a41e62a4.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7b-S1k/e7394d5d-4253-4a53-8a0a-73b0a41e62a4.json deleted file mode 100644 index c319bd14319d74becc9887f4faea8f0c043fc38e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7b-S1k/e7394d5d-4253-4a53-8a0a-73b0a41e62a4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7b-S1k/1762652580.055886", - "retrieved_timestamp": "1762652580.0558872", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen-2.5-7b-S1k", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen-2.5-7b-S1k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7162351449708995 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5562750208035135 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4780966767371601 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4071458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4382480053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-1.5B-Model-Stock/865ffa1b-af08-416e-8de0-a16091d4ec79.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-1.5B-Model-Stock/865ffa1b-af08-416e-8de0-a16091d4ec79.json deleted file mode 100644 index 129f7c18bc153cabd468cad714e88d3a830e6efe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-1.5B-Model-Stock/865ffa1b-af08-416e-8de0-a16091d4ec79.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-1.5B-Model-Stock/1762652580.0561001", - "retrieved_timestamp": "1762652580.056101", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-1.5B-Model-Stock", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-1.5B-Model-Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18292574812608325 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2873695911207613 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3674270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11003989361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.776 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock-v2/e949a47b-85f9-4072-8302-8bfef92579d9.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock-v2/e949a47b-85f9-4072-8302-8bfef92579d9.json deleted file mode 100644 index eb8637a397ad930f11d201afa1d2db7ecfdf6953..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock-v2/e949a47b-85f9-4072-8302-8bfef92579d9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-Model-Stock-v2/1762652580.0565188", - "retrieved_timestamp": "1762652580.05652", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-3B-Model-Stock-v2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-3B-Model-Stock-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6490157227268093 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46774789186946836 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3867069486404834 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3914583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3269614361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.396 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock-v3.1/744d1978-7aa3-44b6-91a0-664383a66f8b.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock-v3.1/744d1978-7aa3-44b6-91a0-664383a66f8b.json deleted file mode 100644 index 1a83383b0b5b2ecef0c89a45ad31e81df029dc9b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock-v3.1/744d1978-7aa3-44b6-91a0-664383a66f8b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-Model-Stock-v3.1/1762652580.056732", - "retrieved_timestamp": "1762652580.056733", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-3B-Model-Stock-v3.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-3B-Model-Stock-v3.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6480915083090644 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.473722298403459 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38972809667673713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39679166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3289561170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.396 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock-v3.2/139f2e38-0b98-4bfe-82b0-99a6e6b51e7f.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock-v3.2/139f2e38-0b98-4bfe-82b0-99a6e6b51e7f.json deleted file mode 100644 index 3e81e3a01f029a6135ad2f3ff081df814554b0bc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock-v3.2/139f2e38-0b98-4bfe-82b0-99a6e6b51e7f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-Model-Stock-v3.2/1762652580.05695", - "retrieved_timestamp": "1762652580.05695", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-3B-Model-Stock-v3.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-3B-Model-Stock-v3.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6353021095138676 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4727417689283166 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37537764350453173 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39279166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3293716755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.396 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock-v4.1/8348f83b-0739-411f-8b87-bd9d5e871ab3.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock-v4.1/8348f83b-0739-411f-8b87-bd9d5e871ab3.json deleted file mode 100644 index e0772b17df5523525f9bb8ec6c3915a98dcdd64e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock-v4.1/8348f83b-0739-411f-8b87-bd9d5e871ab3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-Model-Stock-v4.1/1762652580.0571678", - "retrieved_timestamp": "1762652580.057169", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-3B-Model-Stock-v4.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-3B-Model-Stock-v4.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6380747527671025 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48202557906199406 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3768882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39409374999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3386801861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.396 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock/4dcf1412-4182-40bd-bd1a-2246e29f18e9.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock/4dcf1412-4182-40bd-bd1a-2246e29f18e9.json deleted file mode 100644 index e18648a7ee05b411e962f40f3fdefa31be4ccc3f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock/4dcf1412-4182-40bd-bd1a-2246e29f18e9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-Model-Stock/1762652580.056308", - "retrieved_timestamp": "1762652580.056309", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-3B-Model-Stock", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-3B-Model-Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6380747527671025 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4712481909242632 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37990936555891236 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39415625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3249667553191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.396 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-RP-Mix/f43b9387-56a9-4c21-850c-5cfda84fc8b5.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-RP-Mix/f43b9387-56a9-4c21-850c-5cfda84fc8b5.json deleted file mode 100644 index 04cc00cc83a37c08c1a6fe2d4b9d2d3f1f58e6b8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-RP-Mix/f43b9387-56a9-4c21-850c-5cfda84fc8b5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-RP-Mix/1762652580.057388", - "retrieved_timestamp": "1762652580.057389", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-3B-RP-Mix", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-3B-RP-Mix" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5720543712903984 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4894378989397821 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21525679758308158 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42844791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37275598404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-RP-Thinker-V2/497c8c15-1b77-4468-b33d-efa190c28e78.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-RP-Thinker-V2/497c8c15-1b77-4468-b33d-efa190c28e78.json deleted file mode 100644 index 6292da827616a3fce9407e2756848aa88db02294..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-RP-Thinker-V2/497c8c15-1b77-4468-b33d-efa190c28e78.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-RP-Thinker-V2/1762652580.057826", - "retrieved_timestamp": "1762652580.057826", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-3B-RP-Thinker-V2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-3B-RP-Thinker-V2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6419965691033125 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46784408133522204 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.398125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271276595744681 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-RP-Thinker/80cadd5b-ebbd-4f2f-912b-5d944650e2b1.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-RP-Thinker/80cadd5b-ebbd-4f2f-912b-5d944650e2b1.json deleted file mode 100644 index 282778bc6c093d7bc178b412504316ad6d5063e2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-RP-Thinker/80cadd5b-ebbd-4f2f-912b-5d944650e2b1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-RP-Thinker/1762652580.0576031", - "retrieved_timestamp": "1762652580.057604", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-3B-RP-Thinker", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-3B-RP-Thinker" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.589414974489909 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4164134011392067 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33534743202416917 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3287291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3149933510638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-CyberRombos/1dc11c68-ce65-4a5b-9f75-4cdf1775bfc6.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-CyberRombos/1dc11c68-ce65-4a5b-9f75-4cdf1775bfc6.json deleted file mode 100644 index 7a74aea2361963adc22a4e81d6eb684f6f8351ae..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-CyberRombos/1dc11c68-ce65-4a5b-9f75-4cdf1775bfc6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-CyberRombos/1762652580.058041", - "retrieved_timestamp": "1762652580.058042", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-7B-CyberRombos", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-7B-CyberRombos" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.751830698103255 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5464960546716063 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4962235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41254166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4390791223404255 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-Fuse-Exp/f435a5b0-cc12-4603-b7b0-4625dc547ed2.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-Fuse-Exp/f435a5b0-cc12-4603-b7b0-4625dc547ed2.json deleted file mode 100644 index ea0480ad6d619906b4f56e4c61796d8c754772c4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-Fuse-Exp/f435a5b0-cc12-4603-b7b0-4625dc547ed2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-Fuse-Exp/1762652580.0583198", - "retrieved_timestamp": "1762652580.058321", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-7B-Fuse-Exp", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-7B-Fuse-Exp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5468501354184675 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5108680600425207 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31419939577039274 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45728125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3308676861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-MixStock-Sce-V0.3/daf38e27-1149-44a8-84f2-93f842f4740a.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-MixStock-Sce-V0.3/daf38e27-1149-44a8-84f2-93f842f4740a.json deleted file mode 100644 index ecd979093487bc003df30e813e427bd0216b1eaa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-MixStock-Sce-V0.3/daf38e27-1149-44a8-84f2-93f842f4740a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-MixStock-Sce-V0.3/1762652580.058998", - "retrieved_timestamp": "1762652580.058999", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-7B-MixStock-Sce-V0.3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-7B-MixStock-Sce-V0.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21197644472222593 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3479005166788895 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25755287009063443 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3713958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17794215425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-MixStock-V0.1/4a5bb50c-017d-421d-8ea1-21a8316db0f4.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-MixStock-V0.1/4a5bb50c-017d-421d-8ea1-21a8316db0f4.json deleted file mode 100644 index be0fc7e4eccb0970dd63a9cbcae1c17ad01a61e2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-MixStock-V0.1/4a5bb50c-017d-421d-8ea1-21a8316db0f4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-MixStock-V0.1/1762652580.059214", - "retrieved_timestamp": "1762652580.059214", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-7B-MixStock-V0.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-7B-MixStock-V0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7673428724672757 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5479100568012056 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31722054380664655 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.441625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4256150265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-R1-Bespoke-Stock/20de3a0f-fad0-4832-863e-2b2049037c4f.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-R1-Bespoke-Stock/20de3a0f-fad0-4832-863e-2b2049037c4f.json deleted file mode 100644 index a5318692a54a166d3e148d45875c7d6796734c83..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-R1-Bespoke-Stock/20de3a0f-fad0-4832-863e-2b2049037c4f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-R1-Bespoke-Stock/1762652580.059437", - "retrieved_timestamp": "1762652580.059438", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-7B-R1-Bespoke-Stock", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-7B-R1-Bespoke-Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3726445830396681 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48221362910675625 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20468277945619334 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3926354166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34715757978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-R1-Bespoke-Task/0f460b31-7249-4e2d-a614-d1230e95f3cf.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-R1-Bespoke-Task/0f460b31-7249-4e2d-a614-d1230e95f3cf.json deleted file mode 100644 index 9f10c73ee4f071ec1c6f69ee3e4295d908ec9767..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-R1-Bespoke-Task/0f460b31-7249-4e2d-a614-d1230e95f3cf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-R1-Bespoke-Task/1762652580.059654", - "retrieved_timestamp": "1762652580.059655", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-7B-R1-Bespoke-Task", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-7B-R1-Bespoke-Task" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3786641666334215 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41495531490332715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1782477341389728 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3568854166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2687832446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-RRP-1M-Thinker/1879a765-f4ab-4bad-9525-47f428b43220.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-RRP-1M-Thinker/1879a765-f4ab-4bad-9525-47f428b43220.json deleted file mode 100644 index e142670a403e6eb0b0775792dfbce152efd1eb67..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-RRP-1M-Thinker/1879a765-f4ab-4bad-9525-47f428b43220.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-RRP-1M-Thinker/1762652580.060085", - "retrieved_timestamp": "1762652580.060086", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-7B-RRP-1M-Thinker", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-7B-RRP-1M-Thinker" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23081091503876383 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3481907488085136 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2719033232628399 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3767291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1768617021276596 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-RRP-1M/9ec2ac0c-21e8-4c9c-ba5f-69ad284400bb.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-RRP-1M/9ec2ac0c-21e8-4c9c-ba5f-69ad284400bb.json deleted file mode 100644 index 963d4487554c1a4d4a32885cf484a82a98ca19c8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-RRP-1M/9ec2ac0c-21e8-4c9c-ba5f-69ad284400bb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-RRP-1M/1762652580.059867", - "retrieved_timestamp": "1762652580.0598679", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-7B-RRP-1M", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-7B-RRP-1M" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7481338404322753 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.545239229980545 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44826041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4266123670212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-RRP-ID/85b10038-d136-4be7-8e04-7298ddb4f7d2.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-RRP-ID/85b10038-d136-4be7-8e04-7298ddb4f7d2.json deleted file mode 100644 index 79f113ef74eb61f9b3d8ff92e18fa72c63bc3acf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-RRP-ID/85b10038-d136-4be7-8e04-7298ddb4f7d2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-RRP-ID/1762652580.0603101", - "retrieved_timestamp": "1762652580.0603101", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-7B-RRP-ID", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-7B-RRP-ID" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.747259493698941 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5479543512061099 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.486404833836858 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41796875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4387466755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-Sky-R1-Mini/c1f39d51-d7a2-4fee-ba35-ef4e0d429b29.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-Sky-R1-Mini/c1f39d51-d7a2-4fee-ba35-ef4e0d429b29.json deleted file mode 100644 index de1beac6b8a631af7bd1ab913fa159e11878f341..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-Sky-R1-Mini/c1f39d51-d7a2-4fee-ba35-ef4e0d429b29.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-Sky-R1-Mini/1762652580.061045", - "retrieved_timestamp": "1762652580.0610461", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-7B-Sky-R1-Mini", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-7B-Sky-R1-Mini" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23048622100471194 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3502939195575525 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3448229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12533244680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_QwenMosaic-7B/4fcee29d-6351-4875-995d-81834fd878c3.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_QwenMosaic-7B/4fcee29d-6351-4875-995d-81834fd878c3.json deleted file mode 100644 index 22a4354edf52dafa4e012ca3523652c058790605..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_QwenMosaic-7B/4fcee29d-6351-4875-995d-81834fd878c3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_QwenMosaic-7B/1762652580.061329", - "retrieved_timestamp": "1762652580.0613298", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/QwenMosaic-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "bunnycore/QwenMosaic-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5819215237791282 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5564132127895585 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44410876132930516 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4163854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43101728723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/cognitivecomputations_Dolphin3.0-Qwen2.5-0.5B/4b0c69d9-1801-4a54-9554-d8dcff88f9a3.json b/leaderboard_data/HFOpenLLMv2/alibaba/cognitivecomputations_Dolphin3.0-Qwen2.5-0.5B/4b0c69d9-1801-4a54-9554-d8dcff88f9a3.json deleted file mode 100644 index fe5a50f4c643570d8e6b9baed399c9f16fe54a25..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/cognitivecomputations_Dolphin3.0-Qwen2.5-0.5B/4b0c69d9-1801-4a54-9554-d8dcff88f9a3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_Dolphin3.0-Qwen2.5-0.5B/1762652580.112457", - "retrieved_timestamp": "1762652580.112458", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4697136930012367 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31142229157184026 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35545833333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14128989361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/cognitivecomputations_dolphin-2.9.2-qwen2-72b/5d3c9637-0558-4a2e-9950-8e7017d013f8.json b/leaderboard_data/HFOpenLLMv2/alibaba/cognitivecomputations_dolphin-2.9.2-qwen2-72b/5d3c9637-0558-4a2e-9950-8e7017d013f8.json deleted file mode 100644 index 9f7d4d86618dc5f62b2d6022d377233c36ae7264..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/cognitivecomputations_dolphin-2.9.2-qwen2-72b/5d3c9637-0558-4a2e-9950-8e7017d013f8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.2-qwen2-72b/1762652580.114711", - "retrieved_timestamp": "1762652580.114712", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.2-qwen2-72b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.2-qwen2-72b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6343778950961227 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6296364939584073 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802114803625378 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699664429530201 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45207291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.547124335106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/cognitivecomputations_dolphin-2.9.2-qwen2-7b/c04e8c21-3ae1-457a-9609-682341323a88.json b/leaderboard_data/HFOpenLLMv2/alibaba/cognitivecomputations_dolphin-2.9.2-qwen2-7b/c04e8c21-3ae1-457a-9609-682341323a88.json deleted file mode 100644 index 5e6e0f4ff3657ab9c6d9e20aeb2614aae2393aec..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/cognitivecomputations_dolphin-2.9.2-qwen2-7b/c04e8c21-3ae1-457a-9609-682341323a88.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.2-qwen2-7b/1762652580.114933", - "retrieved_timestamp": "1762652580.114934", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.2-qwen2-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.2-qwen2-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3534599307614906 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48938263759195594 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41914583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4050864361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/deepseek-ai_DeepSeek-R1-Distill-Qwen-1.5B/d38f0e3a-e89e-4af6-95b2-8230b6a84ec3.json b/leaderboard_data/HFOpenLLMv2/alibaba/deepseek-ai_DeepSeek-R1-Distill-Qwen-1.5B/d38f0e3a-e89e-4af6-95b2-8230b6a84ec3.json deleted file mode 100644 index 93d29e7a1833b523445df042cf28e00cc718cc8e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/deepseek-ai_DeepSeek-R1-Distill-Qwen-1.5B/d38f0e3a-e89e-4af6-95b2-8230b6a84ec3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Qwen-1.5B/1762652580.121964", - "retrieved_timestamp": "1762652580.1219652", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34634104176917246 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32409879947333436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1691842900302115 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36345833333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11868351063829788 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/deepseek-ai_DeepSeek-R1-Distill-Qwen-14B/77e70ef3-fef2-4b75-9221-b165ec29f31e.json b/leaderboard_data/HFOpenLLMv2/alibaba/deepseek-ai_DeepSeek-R1-Distill-Qwen-14B/77e70ef3-fef2-4b75-9221-b165ec29f31e.json deleted file mode 100644 index e2cbf872e9c6af5480423dacd091abf9187da799..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/deepseek-ai_DeepSeek-R1-Distill-Qwen-14B/77e70ef3-fef2-4b75-9221-b165ec29f31e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Qwen-14B/1762652580.122241", - "retrieved_timestamp": "1762652580.122248", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43816517950150047 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5905573130283358 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.536625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4666722074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/deepseek-ai_DeepSeek-R1-Distill-Qwen-32B/6731c6b8-0b23-4fc2-b284-01025ce30887.json b/leaderboard_data/HFOpenLLMv2/alibaba/deepseek-ai_DeepSeek-R1-Distill-Qwen-32B/6731c6b8-0b23-4fc2-b284-01025ce30887.json deleted file mode 100644 index 637e2740b5f2b97a72e2848d0dfe8ac8cda29932..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/deepseek-ai_DeepSeek-R1-Distill-Qwen-32B/6731c6b8-0b23-4fc2-b284-01025ce30887.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Qwen-32B/1762652580.12255", - "retrieved_timestamp": "1762652580.1225522", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4186314534324481 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41969150892898055 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4526041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46866688829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/deepseek-ai_DeepSeek-R1-Distill-Qwen-7B/4cb8eae2-bc55-4adb-a4eb-1fc9eb29d891.json b/leaderboard_data/HFOpenLLMv2/alibaba/deepseek-ai_DeepSeek-R1-Distill-Qwen-7B/4cb8eae2-bc55-4adb-a4eb-1fc9eb29d891.json deleted file mode 100644 index f00702a37c1674bcb649807ab720fd4b6967e510..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/deepseek-ai_DeepSeek-R1-Distill-Qwen-7B/4cb8eae2-bc55-4adb-a4eb-1fc9eb29d891.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Qwen-7B/1762652580.1228092", - "retrieved_timestamp": "1762652580.1228101", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40376866713653103 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34425676981862185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19561933534743203 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36628124999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2321309840425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/dfurman_Qwen2-72B-Orpo-v0.1/b197728d-b390-45a8-8adc-ed8567b628da.json b/leaderboard_data/HFOpenLLMv2/alibaba/dfurman_Qwen2-72B-Orpo-v0.1/b197728d-b390-45a8-8adc-ed8567b628da.json deleted file mode 100644 index b70ac13d10d3470ece4d247b1e980ad7c5279c51..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/dfurman_Qwen2-72B-Orpo-v0.1/b197728d-b390-45a8-8adc-ed8567b628da.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dfurman_Qwen2-72B-Orpo-v0.1/1762652580.125584", - "retrieved_timestamp": "1762652580.1255848", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dfurman/Qwen2-72B-Orpo-v0.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "dfurman/Qwen2-72B-Orpo-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7879759039348928 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6969024790545039 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40558912386706947 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38422818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47842708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5454621010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.699 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_QwenQwen2.5-7B-IT-Dare/09deb823-536f-4afc-95bf-ebb0a8eb2e00.json b/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_QwenQwen2.5-7B-IT-Dare/09deb823-536f-4afc-95bf-ebb0a8eb2e00.json deleted file mode 100644 index bdfaf1c971d40d3656cae492ecb8525395cc67e3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_QwenQwen2.5-7B-IT-Dare/09deb823-536f-4afc-95bf-ebb0a8eb2e00.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_QwenQwen2.5-7B-IT-Dare/1762652580.1400871", - "retrieved_timestamp": "1762652580.140088", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/QwenQwen2.5-7B-IT-Dare", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ehristoforu/QwenQwen2.5-7B-IT-Dare" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7509064836855099 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5397962708415814 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5090634441087614 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4033645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4289394946808511 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_QwenQwen2.5-7B-IT/30f8faa5-777f-47bc-b128-f31b950079a3.json b/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_QwenQwen2.5-7B-IT/30f8faa5-777f-47bc-b128-f31b950079a3.json deleted file mode 100644 index 4cfdd36270c2ea3ad86ab86c75e59714d0438ef8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_QwenQwen2.5-7B-IT/30f8faa5-777f-47bc-b128-f31b950079a3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_QwenQwen2.5-7B-IT/1762652580.1398232", - "retrieved_timestamp": "1762652580.1398232", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/QwenQwen2.5-7B-IT", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ehristoforu/QwenQwen2.5-7B-IT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.751830698103255 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5397962708415814 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5090634441087614 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4033645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4289394946808511 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_RQwen-v0.1/93187c79-f1a4-45f9-9d95-a254a185f7a4.json b/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_RQwen-v0.1/93187c79-f1a4-45f9-9d95-a254a185f7a4.json deleted file mode 100644 index 625dbebd0b7fe8837652d2f6739dad1f28787379..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_RQwen-v0.1/93187c79-f1a4-45f9-9d95-a254a185f7a4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_RQwen-v0.1/1762652580.140311", - "retrieved_timestamp": "1762652580.140312", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/RQwen-v0.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ehristoforu/RQwen-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7624968417133207 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6446435015804635 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4645015105740181 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41390625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5201961436170213 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_RQwen-v0.2/69318100-73ee-47f4-96b2-6e7b310fbcd1.json b/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_RQwen-v0.2/69318100-73ee-47f4-96b2-6e7b310fbcd1.json deleted file mode 100644 index 2670e8a91b5043629ca61561934e9aaa6a29ee9f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_RQwen-v0.2/69318100-73ee-47f4-96b2-6e7b310fbcd1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_RQwen-v0.2/1762652580.140525", - "retrieved_timestamp": "1762652580.140526", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/RQwen-v0.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ehristoforu/RQwen-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7503568309862276 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6426888858891955 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3270392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4206666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.515874335106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_coolqwen-3b-it/5aab957b-f25b-4208-9bf8-2d16887245bc.json b/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_coolqwen-3b-it/5aab957b-f25b-4208-9bf8-2d16887245bc.json deleted file mode 100644 index f94dc3c97caba3b9479d65fa4f8cbcb805fd7567..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_coolqwen-3b-it/5aab957b-f25b-4208-9bf8-2d16887245bc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_coolqwen-3b-it/1762652580.140961", - "retrieved_timestamp": "1762652580.1409621", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/coolqwen-3b-it", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ehristoforu/coolqwen-3b-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6472670292601409 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.485089343991756 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41251041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3601230053191489 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.085 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_frqwen2.5-from7b-duable4layers-it/b2c0f0f2-3c1d-4b2a-a82d-24001cbfd3d7.json b/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_frqwen2.5-from7b-duable4layers-it/b2c0f0f2-3c1d-4b2a-a82d-24001cbfd3d7.json deleted file mode 100644 index 65550aa5fb3aca921ec511324e1d976981097637..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_frqwen2.5-from7b-duable4layers-it/b2c0f0f2-3c1d-4b2a-a82d-24001cbfd3d7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_frqwen2.5-from7b-duable4layers-it/1762652580.1428769", - "retrieved_timestamp": "1762652580.1428769", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/frqwen2.5-from7b-duable4layers-it", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ehristoforu/frqwen2.5-from7b-duable4layers-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7728881589737453 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5263561044354216 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4509063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4165729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4126496010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 8.545 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_frqwen2.5-from7b-it/26034d5d-5d52-40d8-aa9b-e90dbd255903.json b/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_frqwen2.5-from7b-it/26034d5d-5d52-40d8-aa9b-e90dbd255903.json deleted file mode 100644 index cd5eca5a41f2ecbb8e19c2bcf30afd2659eb2f75..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_frqwen2.5-from7b-it/26034d5d-5d52-40d8-aa9b-e90dbd255903.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_frqwen2.5-from7b-it/1762652580.143308", - "retrieved_timestamp": "1762652580.143309", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/frqwen2.5-from7b-it", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ehristoforu/frqwen2.5-from7b-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6532123654126606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5142906815349029 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29229607250755285 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4085729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3976894946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 13.206 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_qwen2.5-test-32b-it/606d699f-c7ac-4e5b-b5a3-5bd43f0a3ff6.json b/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_qwen2.5-test-32b-it/606d699f-c7ac-4e5b-b5a3-5bd43f0a3ff6.json deleted file mode 100644 index a8925d02eacf9c35943f9b9cd34bf9c2756d5e50..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_qwen2.5-test-32b-it/606d699f-c7ac-4e5b-b5a3-5bd43f0a3ff6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_qwen2.5-test-32b-it/1762652580.144918", - "retrieved_timestamp": "1762652580.1449192", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/qwen2.5-test-32b-it", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ehristoforu/qwen2.5-test-32b-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7889499860370484 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.708059329453303 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5974320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640939597315436 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4578125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5765458776595744 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_qwen2.5-with-lora-think-3b-it/6c40f966-753b-4301-8c9b-f7b4905c0b68.json b/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_qwen2.5-with-lora-think-3b-it/6c40f966-753b-4301-8c9b-f7b4905c0b68.json deleted file mode 100644 index 9698b20c31b14503f491361491149db9a1287b70..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_qwen2.5-with-lora-think-3b-it/6c40f966-753b-4301-8c9b-f7b4905c0b68.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_qwen2.5-with-lora-think-3b-it/1762652580.1451252", - "retrieved_timestamp": "1762652580.1451259", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/qwen2.5-with-lora-think-3b-it", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "ehristoforu/qwen2.5-with-lora-think-3b-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5319374814381397 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4686847308109022 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.236404833836858 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43095833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3402593085106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/freewheelin_free-evo-qwen72b-v0.8-re/cfb071af-7283-4155-8ce1-40f751dd46ec.json b/leaderboard_data/HFOpenLLMv2/alibaba/freewheelin_free-evo-qwen72b-v0.8-re/cfb071af-7283-4155-8ce1-40f751dd46ec.json deleted file mode 100644 index 8067752eb832631b2f66ba47ccb828667c001c5e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/freewheelin_free-evo-qwen72b-v0.8-re/cfb071af-7283-4155-8ce1-40f751dd46ec.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/freewheelin_free-evo-qwen72b-v0.8-re/1762652580.161332", - "retrieved_timestamp": "1762652580.161333", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "freewheelin/free-evo-qwen72b-v0.8-re", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "freewheelin/free-evo-qwen72b-v0.8-re" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.533086654521115 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6127477065378042 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18051359516616314 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565436241610738 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4871666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4870345744680851 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 72.288 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_ifd_2500_qwen/84ad6756-cb9d-4303-8e7a-395c1dc7c222.json b/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_ifd_2500_qwen/84ad6756-cb9d-4303-8e7a-395c1dc7c222.json deleted file mode 100644 index a49c77f8136a8bb6ad3cc61a47420ac274ea775e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_ifd_2500_qwen/84ad6756-cb9d-4303-8e7a-395c1dc7c222.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_ifd_2500_qwen/1762652580.170526", - "retrieved_timestamp": "1762652580.170526", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/ifd_2500_qwen", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "godlikehhd/ifd_2500_qwen" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33647388928044253 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42983047351897224 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09818731117824774 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36146875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2921376329787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_ifd_new_correct_all_sample_2500_qwen/b481d1bd-e678-4b78-aecb-d43a561dd969.json b/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_ifd_new_correct_all_sample_2500_qwen/b481d1bd-e678-4b78-aecb-d43a561dd969.json deleted file mode 100644 index 24a6d9d8a858e4815fe4863a0e47b3377faf028a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_ifd_new_correct_all_sample_2500_qwen/b481d1bd-e678-4b78-aecb-d43a561dd969.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_ifd_new_correct_all_sample_2500_qwen/1762652580.170775", - "retrieved_timestamp": "1762652580.1707761", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/ifd_new_correct_all_sample_2500_qwen", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "godlikehhd/ifd_new_correct_all_sample_2500_qwen" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33757319467900726 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4019641175400575 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09592145015105741 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3561666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2888962765957447 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_ifd_new_correct_sample_2500_qwen/c42196be-c20b-413d-8870-f10759058098.json b/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_ifd_new_correct_sample_2500_qwen/c42196be-c20b-413d-8870-f10759058098.json deleted file mode 100644 index 7c36ac9107d51690a2f9ed99b0370cd17fc69986..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_ifd_new_correct_sample_2500_qwen/c42196be-c20b-413d-8870-f10759058098.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_ifd_new_correct_sample_2500_qwen/1762652580.170979", - "retrieved_timestamp": "1762652580.1709802", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/ifd_new_correct_sample_2500_qwen", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "godlikehhd/ifd_new_correct_sample_2500_qwen" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33974631754854895 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41103125849665423 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3078859060402685 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3626770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.293218085106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_ifd_new_qwen_2500/8d8663a1-12f6-4e88-af3d-784ff86e8c59.json b/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_ifd_new_qwen_2500/8d8663a1-12f6-4e88-af3d-784ff86e8c59.json deleted file mode 100644 index 48951a77cf0eb5eab6fd36df79ab6815222b1f72..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_ifd_new_qwen_2500/8d8663a1-12f6-4e88-af3d-784ff86e8c59.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_ifd_new_qwen_2500/1762652580.171179", - "retrieved_timestamp": "1762652580.17118", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/ifd_new_qwen_2500", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "godlikehhd/ifd_new_qwen_2500" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.323959316834887 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41598162527775745 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3589583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29105718085106386 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_qwen-2.5-1.5b-cherry/a0621e6d-4178-49c9-aa2b-f56930884b82.json b/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_qwen-2.5-1.5b-cherry/a0621e6d-4178-49c9-aa2b-f56930884b82.json deleted file mode 100644 index ebfaec91a1369e9b9ff7e245767eeb79b224067f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_qwen-2.5-1.5b-cherry/a0621e6d-4178-49c9-aa2b-f56930884b82.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_qwen-2.5-1.5b-cherry/1762652580.1715672", - "retrieved_timestamp": "1762652580.1715689", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/qwen-2.5-1.5b-cherry", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "godlikehhd/qwen-2.5-1.5b-cherry" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28933784580468713 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40357573315752204 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.345625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29230385638297873 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.772 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_qwen_2.5-1.5b-cherry_new/dd0260dd-59f7-4b3d-8f9c-60b297c07a1b.json b/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_qwen_2.5-1.5b-cherry_new/dd0260dd-59f7-4b3d-8f9c-60b297c07a1b.json deleted file mode 100644 index fabac2360fa6eb3fdc9d6c5699fd1de19c89db65..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_qwen_2.5-1.5b-cherry_new/dd0260dd-59f7-4b3d-8f9c-60b297c07a1b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_qwen_2.5-1.5b-cherry_new/1762652580.171904", - "retrieved_timestamp": "1762652580.171905", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/qwen_2.5-1.5b-cherry_new", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "godlikehhd/qwen_2.5-1.5b-cherry_new" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3120442647730245 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4149628386006759 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09667673716012085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34959375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28939494680851063 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_qwen_full_data_alpaca/746630a6-de1d-4976-9168-d8ff06980904.json b/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_qwen_full_data_alpaca/746630a6-de1d-4976-9168-d8ff06980904.json deleted file mode 100644 index 51da10fab34343229069852b18bb03c4b97707d1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_qwen_full_data_alpaca/746630a6-de1d-4976-9168-d8ff06980904.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_qwen_full_data_alpaca/1762652580.1721501", - "retrieved_timestamp": "1762652580.172151", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/qwen_full_data_alpaca", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "godlikehhd/qwen_full_data_alpaca" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3136178672588731 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4229212208733662 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09214501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40515625000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28507313829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_qwen_ins_ans_2500/7f577380-2691-4906-af13-8ca3011e6316.json b/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_qwen_ins_ans_2500/7f577380-2691-4906-af13-8ca3011e6316.json deleted file mode 100644 index f1e005c53b94115aa00dd1ed66d4e3b342e5adc4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_qwen_ins_ans_2500/7f577380-2691-4906-af13-8ca3011e6316.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_qwen_ins_ans_2500/1762652580.172384", - "retrieved_timestamp": "1762652580.172385", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/qwen_ins_ans_2500", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "godlikehhd/qwen_ins_ans_2500" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2698041197356348 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4073950292977672 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3588645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28091755319148937 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/gz987_qwen2.5-7b-cabs-v0.1/9b6c775b-ef08-4e57-8441-52d7887615b1.json b/leaderboard_data/HFOpenLLMv2/alibaba/gz987_qwen2.5-7b-cabs-v0.1/9b6c775b-ef08-4e57-8441-52d7887615b1.json deleted file mode 100644 index 7eb8c63e7ba64d88a3227618e5ffffaa59cccecf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/gz987_qwen2.5-7b-cabs-v0.1/9b6c775b-ef08-4e57-8441-52d7887615b1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/gz987_qwen2.5-7b-cabs-v0.1/1762652580.187419", - "retrieved_timestamp": "1762652580.18742", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "gz987/qwen2.5-7b-cabs-v0.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "gz987/qwen2.5-7b-cabs-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7505817896514582 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5481580818735207 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.479607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.437625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4405751329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/gz987_qwen2.5-7b-cabs-v0.2/7288fa97-efd7-45d5-8769-e0071e9b5488.json b/leaderboard_data/HFOpenLLMv2/alibaba/gz987_qwen2.5-7b-cabs-v0.2/7288fa97-efd7-45d5-8769-e0071e9b5488.json deleted file mode 100644 index 14c53ba6c6a626b75db28b795274ed10453fb397..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/gz987_qwen2.5-7b-cabs-v0.2/7288fa97-efd7-45d5-8769-e0071e9b5488.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/gz987_qwen2.5-7b-cabs-v0.2/1762652580.18783", - "retrieved_timestamp": "1762652580.187832", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "gz987/qwen2.5-7b-cabs-v0.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "gz987/qwen2.5-7b-cabs-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7417640748768822 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5516262466675281 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4901812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44286458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43974401595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/gz987_qwen2.5-7b-cabs-v0.3/b664e033-1424-431e-af8d-09a11b449286.json b/leaderboard_data/HFOpenLLMv2/alibaba/gz987_qwen2.5-7b-cabs-v0.3/b664e033-1424-431e-af8d-09a11b449286.json deleted file mode 100644 index 0f7cec49a57a093b1665645a55e32386e7a2ae7d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/gz987_qwen2.5-7b-cabs-v0.3/b664e033-1424-431e-af8d-09a11b449286.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/gz987_qwen2.5-7b-cabs-v0.3/1762652580.188173", - "retrieved_timestamp": "1762652580.188174", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "gz987/qwen2.5-7b-cabs-v0.3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "gz987/qwen2.5-7b-cabs-v0.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7569515552068511 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5494465314719504 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.493202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44295833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4401595744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/gz987_qwen2.5-7b-cabs-v0.4/8fb7a2aa-3f43-4aaf-b2c0-1770704fcf81.json b/leaderboard_data/HFOpenLLMv2/alibaba/gz987_qwen2.5-7b-cabs-v0.4/8fb7a2aa-3f43-4aaf-b2c0-1770704fcf81.json deleted file mode 100644 index a2cc4c57270b3a31ef02c545a9d863239fe7c750..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/gz987_qwen2.5-7b-cabs-v0.4/8fb7a2aa-3f43-4aaf-b2c0-1770704fcf81.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/gz987_qwen2.5-7b-cabs-v0.4/1762652580.188425", - "retrieved_timestamp": "1762652580.188426", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "gz987/qwen2.5-7b-cabs-v0.4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "gz987/qwen2.5-7b-cabs-v0.4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7582503313430586 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5524401094760039 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48489425981873113 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44295833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4395777925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_Deepseek-qwen-modelstock-2B/15a4291f-4918-43a6-b242-90db88fe4a3d.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_Deepseek-qwen-modelstock-2B/15a4291f-4918-43a6-b242-90db88fe4a3d.json deleted file mode 100644 index df645e8155e23b3c53627cce422d93e986276900..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_Deepseek-qwen-modelstock-2B/15a4291f-4918-43a6-b242-90db88fe4a3d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_Deepseek-qwen-modelstock-2B/1762652580.1914759", - "retrieved_timestamp": "1762652580.191477", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/Deepseek-qwen-modelstock-2B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/Deepseek-qwen-modelstock-2B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21487431127186973 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3549242330959277 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34745833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19107380319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_Qwen2.5-HomerSlerp-7B/9c7dab43-b26d-4cb4-a73c-95bb1e01ffe8.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_Qwen2.5-HomerSlerp-7B/9c7dab43-b26d-4cb4-a73c-95bb1e01ffe8.json deleted file mode 100644 index 8eea96b895e8dbc6e0dd0cab533fa74a01096398..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_Qwen2.5-HomerSlerp-7B/9c7dab43-b26d-4cb4-a73c-95bb1e01ffe8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_Qwen2.5-HomerSlerp-7B/1762652580.1961112", - "retrieved_timestamp": "1762652580.1961112", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/Qwen2.5-HomerSlerp-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/Qwen2.5-HomerSlerp-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44878145542715553 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5632506117591088 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33157099697885195 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4383333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4548703457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenModelStock-1.8B/661b1590-f312-447b-a494-1d37ffd93cae.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenModelStock-1.8B/661b1590-f312-447b-a494-1d37ffd93cae.json deleted file mode 100644 index 870b138eeae863e485170b1eaf789462aaded947..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenModelStock-1.8B/661b1590-f312-447b-a494-1d37ffd93cae.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_QwenModelStock-1.8B/1762652580.196316", - "retrieved_timestamp": "1762652580.196316", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/QwenModelStock-1.8B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/QwenModelStock-1.8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263075306852484 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41881762650909504 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09894259818731117 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4359166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2958776595744681 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp-14B/83387977-a8cd-4cdd-abc7-301006380458.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp-14B/83387977-a8cd-4cdd-abc7-301006380458.json deleted file mode 100644 index 28b5379c58f1d60ea2ac3c797ceab6bd85dab5f6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp-14B/83387977-a8cd-4cdd-abc7-301006380458.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp-14B/1762652580.1965241", - "retrieved_timestamp": "1762652580.196525", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/QwenSlerp-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/QwenSlerp-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7024716640735471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6491286917834284 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38368580060422963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4634479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5399767287234043 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp-3B/7f53fb66-2c19-434a-acec-7cdcf9fce04d.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp-3B/7f53fb66-2c19-434a-acec-7cdcf9fce04d.json deleted file mode 100644 index 091bbb3373fbc99fd6d09aad35a30a7ddb5204a1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp-3B/7f53fb66-2c19-434a-acec-7cdcf9fce04d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp-3B/1762652580.1967301", - "retrieved_timestamp": "1762652580.1967309", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/QwenSlerp-3B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/QwenSlerp-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4333690164319561 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4892345530653528 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27492447129909364 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43166666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3693484042553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp-7B/4f8db3ee-409a-4bac-ab0a-ee3493d1e842.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp-7B/4f8db3ee-409a-4bac-ab0a-ee3493d1e842.json deleted file mode 100644 index 9cdf5880fe0b91072c0ad749f9c8f47ada26e797..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp-7B/4f8db3ee-409a-4bac-ab0a-ee3493d1e842.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp-7B/1762652580.197109", - "retrieved_timestamp": "1762652580.19711", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/QwenSlerp-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/QwenSlerp-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4672912317096415 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5636352508232924 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34441087613293053 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4409375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45088098404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp2-14B/6732a278-0613-40fd-bdbc-88a586631279.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp2-14B/6732a278-0613-40fd-bdbc-88a586631279.json deleted file mode 100644 index c3cae08f9e6b9f9c677bc8f9896151fdd1e0e9b0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp2-14B/6732a278-0613-40fd-bdbc-88a586631279.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp2-14B/1762652580.197355", - "retrieved_timestamp": "1762652580.197356", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/QwenSlerp2-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/QwenSlerp2-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7036707048409332 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6492799322983842 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48065625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5378989361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp2-3B/cc53c4f9-3c1b-4b21-9aac-ea22dced76c3.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp2-3B/cc53c4f9-3c1b-4b21-9aac-ea22dced76c3.json deleted file mode 100644 index c9091ed860e447461d502a639d2a8fdac701aa42..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp2-3B/cc53c4f9-3c1b-4b21-9aac-ea22dced76c3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp2-3B/1762652580.197566", - "retrieved_timestamp": "1762652580.197566", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/QwenSlerp2-3B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/QwenSlerp2-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4280486885907171 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4801760257099328 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26057401812688824 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4251875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3741688829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp3-14B/7d2c1ffb-d1e7-4c88-af08-74642ddd8741.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp3-14B/7d2c1ffb-d1e7-4c88-af08-74642ddd8741.json deleted file mode 100644 index 909fe07ae69a651579550ca6223e35634bbdad29..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp3-14B/7d2c1ffb-d1e7-4c88-af08-74642ddd8741.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp3-14B/1762652580.197938", - "retrieved_timestamp": "1762652580.1979399", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/QwenSlerp3-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/QwenSlerp3-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6632291209546226 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6266526215170748 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43051359516616317 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36661073825503354 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48078125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5262632978723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSparse-7B/96bbc2c8-bb74-408d-8625-e6bf66b63cd0.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSparse-7B/96bbc2c8-bb74-408d-8625-e6bf66b63cd0.json deleted file mode 100644 index f6b52f75593fc8edebc2f6e439a9bc128aadb0b6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSparse-7B/96bbc2c8-bb74-408d-8625-e6bf66b63cd0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSparse-7B/1762652580.198252", - "retrieved_timestamp": "1762652580.198254", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/QwenSparse-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/QwenSparse-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10858632871891026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28956619468137906 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35622916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11220079787234043 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenStock-0.5B/72853b4d-cc12-478f-b6f4-977b8fbabfa0.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenStock-0.5B/72853b4d-cc12-478f-b6f4-977b8fbabfa0.json deleted file mode 100644 index 31ab8f73038dcae2b4acf85f2e97b8793977020d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenStock-0.5B/72853b4d-cc12-478f-b6f4-977b8fbabfa0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_QwenStock-0.5B/1762652580.198598", - "retrieved_timestamp": "1762652580.1985989", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/QwenStock-0.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/QwenStock-0.5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20490742341431845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911778102988436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35753125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11668882978723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenStock-1.7B/25674b98-92b5-4e2d-97ab-084eabb13db2.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenStock-1.7B/25674b98-92b5-4e2d-97ab-084eabb13db2.json deleted file mode 100644 index ab39ec2ea3e9b667c565149e95e0cb2a4e3e4518..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenStock-1.7B/25674b98-92b5-4e2d-97ab-084eabb13db2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_QwenStock-1.7B/1762652580.1988428", - "retrieved_timestamp": "1762652580.198844", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/QwenStock-1.7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/QwenStock-1.7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32141163224688274 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4187550547805281 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44121875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2954621010638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenStock1-14B/67fd0572-cf55-412d-8ec6-0cb168d3ed08.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenStock1-14B/67fd0572-cf55-412d-8ec6-0cb168d3ed08.json deleted file mode 100644 index 602e47ce0561bb622571e94180db7495a1480f13..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenStock1-14B/67fd0572-cf55-412d-8ec6-0cb168d3ed08.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_QwenStock1-14B/1762652580.1990862", - "retrieved_timestamp": "1762652580.1990871", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/QwenStock1-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "hotmailuser/QwenStock1-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6693240601603745 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6502248812491821 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37009063444108764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47811458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5416389627659575 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/huihui-ai_DeepSeek-R1-Distill-Qwen-14B-abliterated-v2/69d04754-3779-4408-9aa9-68c9ba65de7a.json b/leaderboard_data/HFOpenLLMv2/alibaba/huihui-ai_DeepSeek-R1-Distill-Qwen-14B-abliterated-v2/69d04754-3779-4408-9aa9-68c9ba65de7a.json deleted file mode 100644 index 442eb032543ac3ccfb3f94f60850f690f7507c7b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/huihui-ai_DeepSeek-R1-Distill-Qwen-14B-abliterated-v2/69d04754-3779-4408-9aa9-68c9ba65de7a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/huihui-ai_DeepSeek-R1-Distill-Qwen-14B-abliterated-v2/1762652580.200386", - "retrieved_timestamp": "1762652580.200386", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42112927033604175 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34869240677927044 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47006250000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19148936170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jayasuryajsk_Qwen2.5-3B-reasoner/91c0e116-7dc0-4931-ac61-b98bac2af3e0.json b/leaderboard_data/HFOpenLLMv2/alibaba/jayasuryajsk_Qwen2.5-3B-reasoner/91c0e116-7dc0-4931-ac61-b98bac2af3e0.json deleted file mode 100644 index c196f2d7a4c11f38e67f9eacfca5191aeda2cea5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/jayasuryajsk_Qwen2.5-3B-reasoner/91c0e116-7dc0-4931-ac61-b98bac2af3e0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jayasuryajsk_Qwen2.5-3B-reasoner/1762652580.280263", - "retrieved_timestamp": "1762652580.280264", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jayasuryajsk/Qwen2.5-3B-reasoner", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jayasuryajsk/Qwen2.5-3B-reasoner" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4159585455480348 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46511772991620703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41229166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3482380319148936 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeanmichela_o-distil-qwen/172e7bfa-b430-4e14-a15a-a54ec5c9133e.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeanmichela_o-distil-qwen/172e7bfa-b430-4e14-a15a-a54ec5c9133e.json deleted file mode 100644 index 3ca4d79d6660a195db49a785a2338354193bda31..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/jeanmichela_o-distil-qwen/172e7bfa-b430-4e14-a15a-a54ec5c9133e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jeanmichela_o-distil-qwen/1762652580.280534", - "retrieved_timestamp": "1762652580.280535", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jeanmichela/o-distil-qwen", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeanmichela/o-distil-qwen" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44823180272787316 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5900367438200601 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5339895833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46575797872340424 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jebish7_qwen2.5-0.5B-IHA-Hin/5849d742-02eb-4370-8c97-efc5eec4f1ed.json b/leaderboard_data/HFOpenLLMv2/alibaba/jebish7_qwen2.5-0.5B-IHA-Hin/5849d742-02eb-4370-8c97-efc5eec4f1ed.json deleted file mode 100644 index 8df8381b887406c5462ff6d3234559f6ab77ad71..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/jebish7_qwen2.5-0.5B-IHA-Hin/5849d742-02eb-4370-8c97-efc5eec4f1ed.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jebish7_qwen2.5-0.5B-IHA-Hin/1762652580.28294", - "retrieved_timestamp": "1762652580.28294", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jebish7/qwen2.5-0.5B-IHA-Hin", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jebish7/qwen2.5-0.5B-IHA-Hin" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14163419726326149 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29891753632624085 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34748958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.109375 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen-7B-nerd-uncensored-v1.0/1812829e-2c91-410e-9e2e-cc758b652e9b.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen-7B-nerd-uncensored-v1.0/1812829e-2c91-410e-9e2e-cc758b652e9b.json deleted file mode 100644 index bbbeea6d25a8592d636c087ab1d6a6d34388a6e0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen-7B-nerd-uncensored-v1.0/1812829e-2c91-410e-9e2e-cc758b652e9b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen-7B-nerd-uncensored-v1.0/1762652580.283215", - "retrieved_timestamp": "1762652580.2832158", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jeffmeloy/Qwen-7B-nerd-uncensored-v1.0", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen-7B-nerd-uncensored-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6135952605752737 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5421083753999172 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47929166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4362533244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-minperplexity-2/593d3d30-f2e8-4ad3-b0ab-4bfed63a0ab5.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-minperplexity-2/593d3d30-f2e8-4ad3-b0ab-4bfed63a0ab5.json deleted file mode 100644 index c875f594a33d3d4ea42910af473501c50e5b30d0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-minperplexity-2/593d3d30-f2e8-4ad3-b0ab-4bfed63a0ab5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-minperplexity-2/1762652580.28349", - "retrieved_timestamp": "1762652580.2834911", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-minperplexity-2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-minperplexity-2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.509730847484674 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.552390586276348 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3013595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46245833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4345910904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v0.9/45a72c39-9cdb-4fb6-aaf0-d50cc89dfd70.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v0.9/45a72c39-9cdb-4fb6-aaf0-d50cc89dfd70.json deleted file mode 100644 index 188fc3baa8d1a003e56ebf71ce20f2359256267e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v0.9/45a72c39-9cdb-4fb6-aaf0-d50cc89dfd70.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v0.9/1762652580.2837172", - "retrieved_timestamp": "1762652580.2837179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v0.9", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v0.9" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6048274134851084 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5469701834138724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48198958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4363364361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.0/ee2b789c-951d-426e-87e3-232c07d65ade.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.0/ee2b789c-951d-426e-87e3-232c07d65ade.json deleted file mode 100644 index 74c906439e869957317ff481df18a7550bc9e6b8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.0/ee2b789c-951d-426e-87e3-232c07d65ade.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.0/1762652580.283937", - "retrieved_timestamp": "1762652580.283938", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7695159953368174 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.541762771903226 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47129909365558914 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4551145833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4253656914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.1/2316b408-c94b-471e-b64b-c1f8f345868e.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.1/2316b408-c94b-471e-b64b-c1f8f345868e.json deleted file mode 100644 index 50ee1a5bb8c717afa67e85bc286361a640144e27..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.1/2316b408-c94b-471e-b64b-c1f8f345868e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.1/1762652580.2841558", - "retrieved_timestamp": "1762652580.284157", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6626296005709296 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48640249867140106 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38429166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3849734042553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.2/49d47f6d-0d11-4b07-b42e-b94310c97d3e.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.2/49d47f6d-0d11-4b07-b42e-b94310c97d3e.json deleted file mode 100644 index 2cc7daf840bac4200f672b27696f9f68add28502..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.2/49d47f6d-0d11-4b07-b42e-b94310c97d3e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.2/1762652580.284375", - "retrieved_timestamp": "1762652580.284375", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49646715160219335 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.494592979290867 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41724999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3968583776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.3/0ec990b0-b908-44f5-9fb7-5ee603737bc7.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.3/0ec990b0-b908-44f5-9fb7-5ee603737bc7.json deleted file mode 100644 index 76032093d657af947693785aa13273b00b8e962d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.3/0ec990b0-b908-44f5-9fb7-5ee603737bc7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.3/1762652580.284589", - "retrieved_timestamp": "1762652580.284589", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49951462120506923 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5026055485090198 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41873958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4015957446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.4/34c33a97-ae07-42e9-8025-9076e2bce3bb.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.4/34c33a97-ae07-42e9-8025-9076e2bce3bb.json deleted file mode 100644 index 05a4f1ee29c43947873a3c1b49db530b83bbcf88..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.4/34c33a97-ae07-42e9-8025-9076e2bce3bb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.4/1762652580.284807", - "retrieved_timestamp": "1762652580.284807", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6078748830879843 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5467076263362468 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2809667673716012 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47138541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44190492021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.5/bd4ff159-0bf9-4fe1-8cc8-9f3d7bb47bbc.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.5/bd4ff159-0bf9-4fe1-8cc8-9f3d7bb47bbc.json deleted file mode 100644 index 1d0e7896eddf638c8296c6c5058d0be0453d8649..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.5/bd4ff159-0bf9-4fe1-8cc8-9f3d7bb47bbc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.5/1762652580.2850199", - "retrieved_timestamp": "1762652580.2850208", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.5", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5650352176669016 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5522599149696679 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2756797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49820833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44481382978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.7/4aa966fc-ee99-430c-8688-99565f5e6fcc.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.7/4aa966fc-ee99-430c-8688-99565f5e6fcc.json deleted file mode 100644 index c8a4157eee60f6da337e0283fae31bb04db3bb46..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.7/4aa966fc-ee99-430c-8688-99565f5e6fcc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.7/1762652580.285239", - "retrieved_timestamp": "1762652580.285239", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4201551882338861 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5391718355132782 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29154078549848944 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48484375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42802526595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.8/e908901d-c122-4458-9d4e-9a7d1242211c.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.8/e908901d-c122-4458-9d4e-9a7d1242211c.json deleted file mode 100644 index 0b8cd3b99eb8df166f9d25d516855e4d781535b0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.8/e908901d-c122-4458-9d4e-9a7d1242211c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.8/1762652580.2854452", - "retrieved_timestamp": "1762652580.285446", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.8", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.8" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6255601803215468 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5446899383425835 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.270392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47671875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343417553191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.0/e9350de5-cae6-46bc-a83f-0e6e65eae4e3.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.0/e9350de5-cae6-46bc-a83f-0e6e65eae4e3.json deleted file mode 100644 index eaf7786d9bf99bdad050033ce6f9015f35428512..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.0/e9350de5-cae6-46bc-a83f-0e6e65eae4e3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.0/1762652580.285652", - "retrieved_timestamp": "1762652580.2856529", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-olm-v1.0", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-olm-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5331365222055258 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5659918212629057 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2862537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42776041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4566156914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.1/769eabf2-4c12-4a48-8ec2-7dacf50a28f0.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.1/769eabf2-4c12-4a48-8ec2-7dacf50a28f0.json deleted file mode 100644 index af46d71bb41bf4455a026052845a7842aeb2d1a7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.1/769eabf2-4c12-4a48-8ec2-7dacf50a28f0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.1/1762652580.285865", - "retrieved_timestamp": "1762652580.285865", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-olm-v1.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-olm-v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4329445870290828 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5478077656573704 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48081250000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4354222074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.2/8c4531a4-4418-4090-9c82-f60bcf8d9935.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.2/8c4531a4-4418-4090-9c82-f60bcf8d9935.json deleted file mode 100644 index b35d6d1df11e3b8ac82921f54a9ba834892cffa9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.2/8c4531a4-4418-4090-9c82-f60bcf8d9935.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.2/1762652580.286082", - "retrieved_timestamp": "1762652580.286083", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-olm-v1.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-olm-v1.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42025492360270744 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5533340429711561 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2847432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46878125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4387466755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.3/a5c9246f-a7b5-4183-9a64-93151b536945.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.3/a5c9246f-a7b5-4183-9a64-93151b536945.json deleted file mode 100644 index ebafcd423c6f5e20d0e2773f04a719fa15d910f0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.3/a5c9246f-a7b5-4183-9a64-93151b536945.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.3/1762652580.286303", - "retrieved_timestamp": "1762652580.286304", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-olm-v1.3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-olm-v1.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4218540140161438 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5531852688351706 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104229607250755 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4700520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44697473404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.4/1faf58ba-28e7-45a1-bc2c-d0aa707a49aa.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.4/1faf58ba-28e7-45a1-bc2c-d0aa707a49aa.json deleted file mode 100644 index 8554a40d07a04e14cd4c9a95e5f5a237692f5708..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.4/1faf58ba-28e7-45a1-bc2c-d0aa707a49aa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.4/1762652580.286527", - "retrieved_timestamp": "1762652580.2865438", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-olm-v1.4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-olm-v1.4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4545018329144448 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5581962445576828 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29229607250755285 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46220833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4457280585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.5/b347eea5-e676-478e-b0ee-d53abf2c8697.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.5/b347eea5-e676-478e-b0ee-d53abf2c8697.json deleted file mode 100644 index b768d92651f0dcb6aef398fd0d4e34fcbf5a7122..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.5/b347eea5-e676-478e-b0ee-d53abf2c8697.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.5/1762652580.286995", - "retrieved_timestamp": "1762652580.286996", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jeffmeloy/Qwen2.5-7B-olm-v1.5", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/Qwen2.5-7B-olm-v1.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4546514359676769 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5543943528577703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28172205438066467 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976510067114096 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4539270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43991023936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_jeffmeloy_Qwen2.5-7B-minperplexity-1/ba005ac7-761f-4cd7-91ed-34b88028240f.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_jeffmeloy_Qwen2.5-7B-minperplexity-1/ba005ac7-761f-4cd7-91ed-34b88028240f.json deleted file mode 100644 index cba0021742962828b4a45f76ad09e41cca047ee3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_jeffmeloy_Qwen2.5-7B-minperplexity-1/ba005ac7-761f-4cd7-91ed-34b88028240f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jeffmeloy_jeffmeloy_Qwen2.5-7B-minperplexity-1/1762652580.2872581", - "retrieved_timestamp": "1762652580.2872589", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37571643239936703 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5582354546195324 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29154078549848944 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42903125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4367519946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/kayfour_T3Q-Qwen2.5-7B-it-KOR-Safe/35e56ec7-deae-4674-abfc-3c45f5dec040.json b/leaderboard_data/HFOpenLLMv2/alibaba/kayfour_T3Q-Qwen2.5-7B-it-KOR-Safe/35e56ec7-deae-4674-abfc-3c45f5dec040.json deleted file mode 100644 index 3c6db091a426c466a0232c5e90a146ac98636798..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/kayfour_T3Q-Qwen2.5-7B-it-KOR-Safe/35e56ec7-deae-4674-abfc-3c45f5dec040.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/kayfour_T3Q-Qwen2.5-7B-it-KOR-Safe/1762652580.3057542", - "retrieved_timestamp": "1762652580.305755", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "kayfour/T3Q-Qwen2.5-7B-it-KOR-Safe", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "kayfour/T3Q-Qwen2.5-7B-it-KOR-Safe" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6081497094376255 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5549941776226351 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37613293051359514 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42772916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44639295212765956 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/kms7530_chemeng_qwen-math-7b_24_1_100_1/af7f201f-3af3-4ffb-9416-c83235851cb6.json b/leaderboard_data/HFOpenLLMv2/alibaba/kms7530_chemeng_qwen-math-7b_24_1_100_1/af7f201f-3af3-4ffb-9416-c83235851cb6.json deleted file mode 100644 index 02c9067aece085852d29101a105ea9010eb80ed8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/kms7530_chemeng_qwen-math-7b_24_1_100_1/af7f201f-3af3-4ffb-9416-c83235851cb6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/kms7530_chemeng_qwen-math-7b_24_1_100_1/1762652580.310198", - "retrieved_timestamp": "1762652580.310199", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "kms7530/chemeng_qwen-math-7b_24_1_100_1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "kms7530/chemeng_qwen-math-7b_24_1_100_1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.211052230304481 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3578007894497858 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24412751677852348 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686979166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21584109042553193 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 8.911 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/kms7530_chemeng_qwen-math-7b_24_1_100_1_nonmath/8ae7c857-be7e-463e-86c2-6b165920a45c.json b/leaderboard_data/HFOpenLLMv2/alibaba/kms7530_chemeng_qwen-math-7b_24_1_100_1_nonmath/8ae7c857-be7e-463e-86c2-6b165920a45c.json deleted file mode 100644 index 465479cffaca0afb4969d635252d705889f485ce..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/kms7530_chemeng_qwen-math-7b_24_1_100_1_nonmath/8ae7c857-be7e-463e-86c2-6b165920a45c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/kms7530_chemeng_qwen-math-7b_24_1_100_1_nonmath/1762652580.310462", - "retrieved_timestamp": "1762652580.310463", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "kms7530/chemeng_qwen-math-7b_24_1_100_1_nonmath", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "kms7530/chemeng_qwen-math-7b_24_1_100_1_nonmath" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25836336476105626 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3892856967853256 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30966767371601206 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40869791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24517952127659576 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 15.231 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_212_QwenLawLo/c4f888d2-c08c-43c4-a1f9-79edf519c893.json b/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_212_QwenLawLo/c4f888d2-c08c-43c4-a1f9-79edf519c893.json deleted file mode 100644 index 6212f6dc0218e0b6e0a51fb8196281f745e713e8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_212_QwenLawLo/c4f888d2-c08c-43c4-a1f9-79edf519c893.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_212_QwenLawLo/1762652580.322983", - "retrieved_timestamp": "1762652580.322984", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lkoenig/BBAI_212_QwenLawLo", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "lkoenig/BBAI_212_QwenLawLo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4566250880995758 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5574113357405873 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3602719033232628 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43696874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44888630319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_212_Qwencore/d42a520c-15dd-4497-a26a-b6f77b3257e6.json b/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_212_Qwencore/d42a520c-15dd-4497-a26a-b6f77b3257e6.json deleted file mode 100644 index f5d94f72156803249b68f1926a1d22525d07e54c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_212_Qwencore/d42a520c-15dd-4497-a26a-b6f77b3257e6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_212_Qwencore/1762652580.3232372", - "retrieved_timestamp": "1762652580.323238", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lkoenig/BBAI_212_Qwencore", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "lkoenig/BBAI_212_Qwencore" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4384400058511416 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.556868234536878 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34894259818731116 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.448969414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_230_Xiaqwen/c9393ea7-3269-435f-9159-95638b9c691e.json b/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_230_Xiaqwen/c9393ea7-3269-435f-9159-95638b9c691e.json deleted file mode 100644 index de42fa574556c460b5a17f6b6548e19efed6efab..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_230_Xiaqwen/c9393ea7-3269-435f-9159-95638b9c691e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_230_Xiaqwen/1762652580.3234491", - "retrieved_timestamp": "1762652580.32345", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lkoenig/BBAI_230_Xiaqwen", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "lkoenig/BBAI_230_Xiaqwen" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4648931501748693 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.557779565750489 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36631419939577037 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4422083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4480551861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_375_QwenDyancabs/08e49740-3cdd-47b2-9b95-b96d8a13dd79.json b/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_375_QwenDyancabs/08e49740-3cdd-47b2-9b95-b96d8a13dd79.json deleted file mode 100644 index 68a2b376196d0bfb964743fa3656ce58e38f1cb9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_375_QwenDyancabs/08e49740-3cdd-47b2-9b95-b96d8a13dd79.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_375_QwenDyancabs/1762652580.323661", - "retrieved_timestamp": "1762652580.323662", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lkoenig/BBAI_375_QwenDyancabs", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "lkoenig/BBAI_375_QwenDyancabs" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4565752204151651 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5571383122938682 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44617708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4476396276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_456_QwenKoen/249b0b65-5c71-4c5d-9802-28df0ead0cdf.json b/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_456_QwenKoen/249b0b65-5c71-4c5d-9802-28df0ead0cdf.json deleted file mode 100644 index 2f2d1e8b7c820a7fd8983e1f664a15d18b1073b1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_456_QwenKoen/249b0b65-5c71-4c5d-9802-28df0ead0cdf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_456_QwenKoen/1762652580.323869", - "retrieved_timestamp": "1762652580.323869", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lkoenig/BBAI_456_QwenKoen", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "lkoenig/BBAI_456_QwenKoen" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45292823042859615 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5552713612233481 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3685800604229607 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4395104166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4468916223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_7B_KoenQwenDyan/fe084d09-ee80-4c7f-93a7-3ee0f9081177.json b/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_7B_KoenQwenDyan/fe084d09-ee80-4c7f-93a7-3ee0f9081177.json deleted file mode 100644 index 1b8d76394474ea8bbd5d8c6de1d55c9d4f47e061..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_7B_KoenQwenDyan/fe084d09-ee80-4c7f-93a7-3ee0f9081177.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_7B_KoenQwenDyan/1762652580.324076", - "retrieved_timestamp": "1762652580.3240771", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lkoenig/BBAI_7B_KoenQwenDyan", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "lkoenig/BBAI_7B_KoenQwenDyan" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5807224830117421 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5536566841353078 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37386706948640486 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43687499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44597739361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_7B_Qwen2.5koen/078cedea-7b3a-4c77-b932-3d42f0c841fe.json b/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_7B_Qwen2.5koen/078cedea-7b3a-4c77-b932-3d42f0c841fe.json deleted file mode 100644 index cb083261a0289756a30307350f5fcedef18874b5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_7B_Qwen2.5koen/078cedea-7b3a-4c77-b932-3d42f0c841fe.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_7B_Qwen2.5koen/1762652580.324276", - "retrieved_timestamp": "1762652580.324277", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lkoenig/BBAI_7B_Qwen2.5koen", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "lkoenig/BBAI_7B_Qwen2.5koen" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45999725173650363 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5544031312134464 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36555891238670696 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43690625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4484707446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_7B_QwenDyanKoenLo/dedc34ed-fd8f-4b29-b898-3c9830993247.json b/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_7B_QwenDyanKoenLo/dedc34ed-fd8f-4b29-b898-3c9830993247.json deleted file mode 100644 index 1cb7e6f8344f31a49a8ce206ecc63c56a299f343..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_7B_QwenDyanKoenLo/dedc34ed-fd8f-4b29-b898-3c9830993247.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_7B_QwenDyanKoenLo/1762652580.324512", - "retrieved_timestamp": "1762652580.324513", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lkoenig/BBAI_7B_QwenDyanKoenLo", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "lkoenig/BBAI_7B_QwenDyanKoenLo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46631714960748594 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5562461525503201 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640483383685801 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4464760638297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_7B_QwenDyancabsLAW/05f391f3-68ac-422a-b7e8-01eba1729a0b.json b/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_7B_QwenDyancabsLAW/05f391f3-68ac-422a-b7e8-01eba1729a0b.json deleted file mode 100644 index f21cabfb35e1d6a79c5e1cdc43f71214a2b7837f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_7B_QwenDyancabsLAW/05f391f3-68ac-422a-b7e8-01eba1729a0b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_7B_QwenDyancabsLAW/1762652580.3247318", - "retrieved_timestamp": "1762652580.3247318", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lkoenig/BBAI_7B_QwenDyancabsLAW", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "lkoenig/BBAI_7B_QwenDyancabsLAW" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5549685944405289 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5578836606885887 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3678247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4461145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4471409574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/macadeliccc_Samantha-Qwen-2-7B/c443492e-3b5f-4394-9fbb-761dba338638.json b/leaderboard_data/HFOpenLLMv2/alibaba/macadeliccc_Samantha-Qwen-2-7B/c443492e-3b5f-4394-9fbb-761dba338638.json deleted file mode 100644 index 4cc97048dbf7f222b96d98be74fc332b3e572500..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/macadeliccc_Samantha-Qwen-2-7B/c443492e-3b5f-4394-9fbb-761dba338638.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/macadeliccc_Samantha-Qwen-2-7B/1762652580.3290062", - "retrieved_timestamp": "1762652580.3290062", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "macadeliccc/Samantha-Qwen-2-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "macadeliccc/Samantha-Qwen-2-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4377152621710395 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5082341412476951 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21148036253776434 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4799479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3779089095744681 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-MST-v1.1/19b72caf-a841-4928-98c3-c505694724c3.json b/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-MST-v1.1/19b72caf-a841-4928-98c3-c505694724c3.json deleted file mode 100644 index a2f51c6d66b0f425dbb584085b191955259a16d4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-MST-v1.1/19b72caf-a841-4928-98c3-c505694724c3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-MST-v1.1/1762652580.333172", - "retrieved_timestamp": "1762652580.333172", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7444868504457063 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.555919540267728 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4073333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.429936835106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-MST-v1.3/36b2821f-5fa6-4384-9ddc-6cbc5b52321c.json b/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-MST-v1.3/36b2821f-5fa6-4384-9ddc-6cbc5b52321c.json deleted file mode 100644 index 187ceb497b30b22b3182bd139360bf5d3a54c5ff..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-MST-v1.3/36b2821f-5fa6-4384-9ddc-6cbc5b52321c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-MST-v1.3/1762652580.333376", - "retrieved_timestamp": "1762652580.3333771", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.704320092909037 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5516165586639877 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47583081570996977 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43105208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44398271276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-MST/80d3a785-dde1-44fa-b6e1-93722849fdb1.json b/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-MST/80d3a785-dde1-44fa-b6e1-93722849fdb1.json deleted file mode 100644 index 5214db02a37f881360138ef5a116acb0f1d5e0ee..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-MST/80d3a785-dde1-44fa-b6e1-93722849fdb1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-MST/1762652580.332918", - "retrieved_timestamp": "1762652580.3329191", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-MST", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "marcuscedricridia/Hush-Qwen2.5-7B-MST" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7488330961847898 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5458495423775734 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4244712990936556 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3913645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41630651595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-Preview/6bfc8cf9-e615-4447-bc6e-ff96752dc5fb.json b/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-Preview/6bfc8cf9-e615-4447-bc6e-ff96752dc5fb.json deleted file mode 100644 index 81c9ade0e28c1ed0bb70d02f9e0a281d89e03794..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-Preview/6bfc8cf9-e615-4447-bc6e-ff96752dc5fb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-Preview/1762652580.333591", - "retrieved_timestamp": "1762652580.3335922", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-Preview", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "marcuscedricridia/Hush-Qwen2.5-7B-Preview" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7962439660101863 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5431064770878757 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37537764350453173 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4298125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43641954787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-RP-v1.4-1M/feefc068-9257-4d0f-ac55-acd08ededeca.json b/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-RP-v1.4-1M/feefc068-9257-4d0f-ac55-acd08ededeca.json deleted file mode 100644 index b1be1b278b0bbdcc65a58622cbd334aa6a1c05b2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-RP-v1.4-1M/feefc068-9257-4d0f-ac55-acd08ededeca.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-RP-v1.4-1M/1762652580.333802", - "retrieved_timestamp": "1762652580.333802", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-RP-v1.4-1M", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "marcuscedricridia/Hush-Qwen2.5-7B-RP-v1.4-1M" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7727884236049238 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5295123017150106 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3368580060422961 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44327083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4134807180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-v1.1/25d6c4bd-6540-43cb-a682-77d4fa4eb64e.json b/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-v1.1/25d6c4bd-6540-43cb-a682-77d4fa4eb64e.json deleted file mode 100644 index 39aedf392c7cc6c84c554559c1e602235ef565a1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-v1.1/25d6c4bd-6540-43cb-a682-77d4fa4eb64e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-v1.1/1762652580.334015", - "retrieved_timestamp": "1762652580.334016", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-v1.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7889499860370484 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5383575636307666 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4179375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4227061170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-v1.2/6e342711-8d2d-42ed-a019-11be429e10d8.json b/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-v1.2/6e342711-8d2d-42ed-a019-11be429e10d8.json deleted file mode 100644 index 937c6a72e90d927a3900845bcdbad6a6a97a5925..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-v1.2/6e342711-8d2d-42ed-a019-11be429e10d8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-v1.2/1762652580.334213", - "retrieved_timestamp": "1762652580.334214", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-v1.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7865020368178655 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.540250407222091 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44033232628398794 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.421875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4197140957446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-v1.3/1af605c0-ec58-4651-a57a-2fd7d0cd5a67.json b/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-v1.3/1af605c0-ec58-4651-a57a-2fd7d0cd5a67.json deleted file mode 100644 index 3a2036c672ad1fdfc3fa51140624850c72aa01e4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-v1.3/1af605c0-ec58-4651-a57a-2fd7d0cd5a67.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-v1.3/1762652580.334473", - "retrieved_timestamp": "1762652580.334474", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-v1.3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7856276900845313 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5326893189699237 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42463541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43450797872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-v1.4/fd65e319-bc38-457b-9913-9a2214e69823.json b/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-v1.4/fd65e319-bc38-457b-9913-9a2214e69823.json deleted file mode 100644 index e687f5eab45d255345b8253bcacb37d2386b505c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-v1.4/fd65e319-bc38-457b-9913-9a2214e69823.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-v1.4/1762652580.334734", - "retrieved_timestamp": "1762652580.3347352", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/Hush-Qwen2.5-7B-v1.4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7834545672149895 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.54229983590397 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4231770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4195478723404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Qwen2.5-7B-Preview/56032f8a-b733-4b1f-acbc-78d0d1ddf2a5.json b/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Qwen2.5-7B-Preview/56032f8a-b733-4b1f-acbc-78d0d1ddf2a5.json deleted file mode 100644 index 29d967667462660341a43ba40dbf71d5c0b89aa2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Qwen2.5-7B-Preview/56032f8a-b733-4b1f-acbc-78d0d1ddf2a5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Qwen2.5-7B-Preview/1762652580.334959", - "retrieved_timestamp": "1762652580.334959", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/Qwen2.5-7B-Preview", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "marcuscedricridia/Qwen2.5-7B-Preview" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7679423928509688 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5359781834039953 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34441087613293053 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41403125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42578125 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Yell-Qwen2.5-7B-Preview-v1.1/be0058b1-23b2-40b7-b336-ab40bf82c997.json b/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Yell-Qwen2.5-7B-Preview-v1.1/be0058b1-23b2-40b7-b336-ab40bf82c997.json deleted file mode 100644 index 4d7920fac0775b5c1f2f3fffdcdb350d380b3c46..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Yell-Qwen2.5-7B-Preview-v1.1/be0058b1-23b2-40b7-b336-ab40bf82c997.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Yell-Qwen2.5-7B-Preview-v1.1/1762652580.335416", - "retrieved_timestamp": "1762652580.335417", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/Yell-Qwen2.5-7B-Preview-v1.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "marcuscedricridia/Yell-Qwen2.5-7B-Preview-v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5757013612769672 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5347734083768815 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18957703927492447 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4059375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38314494680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Yell-Qwen2.5-7B-Preview/f47334f2-f0ab-48f5-814e-f3ede36802d9.json b/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Yell-Qwen2.5-7B-Preview/f47334f2-f0ab-48f5-814e-f3ede36802d9.json deleted file mode 100644 index de5d1157f3a148da79e37f3e45b6f7bb5aed36a2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Yell-Qwen2.5-7B-Preview/f47334f2-f0ab-48f5-814e-f3ede36802d9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Yell-Qwen2.5-7B-Preview/1762652580.335188", - "retrieved_timestamp": "1762652580.335188", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/Yell-Qwen2.5-7B-Preview", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "marcuscedricridia/Yell-Qwen2.5-7B-Preview" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5838696879834395 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.537136379549371 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40463541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37982047872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/maywell_Qwen2-7B-Multilingual-RP/fd91f8aa-a521-4e9b-824a-aa21adade569.json b/leaderboard_data/HFOpenLLMv2/alibaba/maywell_Qwen2-7B-Multilingual-RP/fd91f8aa-a521-4e9b-824a-aa21adade569.json deleted file mode 100644 index 9bbede6a50bbe28ddbf4efbfdeb5965877639638..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/maywell_Qwen2-7B-Multilingual-RP/fd91f8aa-a521-4e9b-824a-aa21adade569.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/maywell_Qwen2-7B-Multilingual-RP/1762652580.342533", - "retrieved_timestamp": "1762652580.3425338", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "maywell/Qwen2-7B-Multilingual-RP", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "maywell/Qwen2-7B-Multilingual-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4347176602525743 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5062058680861069 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3695625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3858876329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/mergekit-community_SuperQwen-2.5-1.5B/95d33475-a71b-41d6-a08d-3da30e631897.json b/leaderboard_data/HFOpenLLMv2/alibaba/mergekit-community_SuperQwen-2.5-1.5B/95d33475-a71b-41d6-a08d-3da30e631897.json deleted file mode 100644 index 1e8c944c05d6ccb2b0b12292dad4778e86dafd68..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/mergekit-community_SuperQwen-2.5-1.5B/95d33475-a71b-41d6-a08d-3da30e631897.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mergekit-community_SuperQwen-2.5-1.5B/1762652580.346312", - "retrieved_timestamp": "1762652580.346313", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mergekit-community/SuperQwen-2.5-1.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "mergekit-community/SuperQwen-2.5-1.5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1336409615376091 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2906897601443365 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3355208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10746343085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/mhl1_Qwen2.5-0.5B-cinstruct-stage1/bf9d8219-66b9-4c77-8c6d-2983e60dc2cb.json b/leaderboard_data/HFOpenLLMv2/alibaba/mhl1_Qwen2.5-0.5B-cinstruct-stage1/bf9d8219-66b9-4c77-8c6d-2983e60dc2cb.json deleted file mode 100644 index e854fd70171a11618d42c6c28532ee9b7cd452e6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/mhl1_Qwen2.5-0.5B-cinstruct-stage1/bf9d8219-66b9-4c77-8c6d-2983e60dc2cb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mhl1_Qwen2.5-0.5B-cinstruct-stage1/1762652580.3535528", - "retrieved_timestamp": "1762652580.353554", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mhl1/Qwen2.5-0.5B-cinstruct-stage1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "mhl1/Qwen2.5-0.5B-cinstruct-stage1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14817905379947427 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32557832478283544 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35003125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11394614361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/migtissera_Tess-v2.5.2-Qwen2-72B/34b9dd9e-dc03-4354-b016-3b1463a902f9.json b/leaderboard_data/HFOpenLLMv2/alibaba/migtissera_Tess-v2.5.2-Qwen2-72B/34b9dd9e-dc03-4354-b016-3b1463a902f9.json deleted file mode 100644 index a5fdfff157fee40c9ae6f9fe9594ba9a5e615d2d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/migtissera_Tess-v2.5.2-Qwen2-72B/34b9dd9e-dc03-4354-b016-3b1463a902f9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/migtissera_Tess-v2.5.2-Qwen2-72B/1762652580.359263", - "retrieved_timestamp": "1762652580.359264", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "migtissera/Tess-v2.5.2-Qwen2-72B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "migtissera/Tess-v2.5.2-Qwen2-72B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44943084349525925 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6646791891060648 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2938066465256798 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41883333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5561003989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/minghaowu_Qwen1.5-1.8B-OpenHermes-2.5/cf3f376a-92ec-4678-a57a-cee2e40032a5.json b/leaderboard_data/HFOpenLLMv2/alibaba/minghaowu_Qwen1.5-1.8B-OpenHermes-2.5/cf3f376a-92ec-4678-a57a-cee2e40032a5.json deleted file mode 100644 index ec0299a7859b18fc8f9a3100d884a2e1bc190deb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/minghaowu_Qwen1.5-1.8B-OpenHermes-2.5/cf3f376a-92ec-4678-a57a-cee2e40032a5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/minghaowu_Qwen1.5-1.8B-OpenHermes-2.5/1762652580.360414", - "retrieved_timestamp": "1762652580.360415", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "minghaowu/Qwen1.5-1.8B-OpenHermes-2.5", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "minghaowu/Qwen1.5-1.8B-OpenHermes-2.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27779735546128714 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33746396801266015 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3528854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17918882978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.837 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/mobiuslabsgmbh_DeepSeek-R1-ReDistill-Qwen-7B-v1.1/99d27765-a9c5-4f50-8bd1-c3ce67683621.json b/leaderboard_data/HFOpenLLMv2/alibaba/mobiuslabsgmbh_DeepSeek-R1-ReDistill-Qwen-7B-v1.1/99d27765-a9c5-4f50-8bd1-c3ce67683621.json deleted file mode 100644 index ebd0d0ea1370c1e506f2326d8a2ce2bb66d71d7b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/mobiuslabsgmbh_DeepSeek-R1-ReDistill-Qwen-7B-v1.1/99d27765-a9c5-4f50-8bd1-c3ce67683621.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mobiuslabsgmbh_DeepSeek-R1-ReDistill-Qwen-7B-v1.1/1762652580.371459", - "retrieved_timestamp": "1762652580.3714602", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731512387132807 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36983762765044165 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3496978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40088541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23262965425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Dumpling-Qwen2.5-1.5B/f2eaeee8-a75b-4d0f-9dcd-2a11c3de926b.json b/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Dumpling-Qwen2.5-1.5B/f2eaeee8-a75b-4d0f-9dcd-2a11c3de926b.json deleted file mode 100644 index e03df7d08c3937224e5487266eb9ddf266dd6228..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Dumpling-Qwen2.5-1.5B/f2eaeee8-a75b-4d0f-9dcd-2a11c3de926b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Dumpling-Qwen2.5-1.5B/1762652580.377223", - "retrieved_timestamp": "1762652580.377223", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Dumpling-Qwen2.5-1.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "nbeerbower/Dumpling-Qwen2.5-1.5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3698963195432563 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4159743091354106 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37276041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2771775265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Dumpling-Qwen2.5-14B/0a70cdb4-5ccc-40e2-bf99-3af619b8b7f6.json b/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Dumpling-Qwen2.5-14B/0a70cdb4-5ccc-40e2-bf99-3af619b8b7f6.json deleted file mode 100644 index d6d272fcfb07907a9b8bb7c91a9006c6c4c7fb2f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Dumpling-Qwen2.5-14B/0a70cdb4-5ccc-40e2-bf99-3af619b8b7f6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Dumpling-Qwen2.5-14B/1762652580.3774788", - "retrieved_timestamp": "1762652580.37748", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Dumpling-Qwen2.5-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "nbeerbower/Dumpling-Qwen2.5-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6064010159709571 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6450644262798378 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30966767371601206 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43539583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5170378989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Dumpling-Qwen2.5-7B-1k-r16/76e3f2a5-7545-4270-800d-6413e39608ad.json b/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Dumpling-Qwen2.5-7B-1k-r16/76e3f2a5-7545-4270-800d-6413e39608ad.json deleted file mode 100644 index 82481c2e5742262e2213e437b4d28fe995ee1056..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Dumpling-Qwen2.5-7B-1k-r16/76e3f2a5-7545-4270-800d-6413e39608ad.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Dumpling-Qwen2.5-7B-1k-r16/1762652580.3776908", - "retrieved_timestamp": "1762652580.377692", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r16", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r16" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4860004787297703 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5214228032573378 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.236404833836858 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4229895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39586103723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Dumpling-Qwen2.5-7B-1k-r64-2e-5/2e6c1c46-01af-493a-a2ce-266d13b53000.json b/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Dumpling-Qwen2.5-7B-1k-r64-2e-5/2e6c1c46-01af-493a-a2ce-266d13b53000.json deleted file mode 100644 index 2fc0fe24c055353299ab1db45082feafa83917ff..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Dumpling-Qwen2.5-7B-1k-r64-2e-5/2e6c1c46-01af-493a-a2ce-266d13b53000.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Dumpling-Qwen2.5-7B-1k-r64-2e-5/1762652580.377894", - "retrieved_timestamp": "1762652580.377894", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r64-2e-5", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r64-2e-5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.417906709752346 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5300548108450988 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21148036253776434 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4486041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41215093085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_EVA-abliterated-TIES-Qwen2.5-1.5B/dea423e8-cdbd-4895-80af-f53dbb5caa1c.json b/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_EVA-abliterated-TIES-Qwen2.5-1.5B/dea423e8-cdbd-4895-80af-f53dbb5caa1c.json deleted file mode 100644 index 979ea7c06a189234e0878791b385f8952d1e7d80..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_EVA-abliterated-TIES-Qwen2.5-1.5B/dea423e8-cdbd-4895-80af-f53dbb5caa1c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_EVA-abliterated-TIES-Qwen2.5-1.5B/1762652580.378096", - "retrieved_timestamp": "1762652580.3780968", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-1.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-1.5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41148707651254224 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39965589836197535 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13746223564954682 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35018750000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27119348404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_EVA-abliterated-TIES-Qwen2.5-14B/997fc8c5-fc91-4e9e-a2b7-bdda77e4f4a7.json b/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_EVA-abliterated-TIES-Qwen2.5-14B/997fc8c5-fc91-4e9e-a2b7-bdda77e4f4a7.json deleted file mode 100644 index b0473b64607e855f4ac6a37f3f9ce73174ab44d1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_EVA-abliterated-TIES-Qwen2.5-14B/997fc8c5-fc91-4e9e-a2b7-bdda77e4f4a7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_EVA-abliterated-TIES-Qwen2.5-14B/1762652580.378304", - "retrieved_timestamp": "1762652580.378304", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.783554302583811 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6372016353633118 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5045317220543807 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3548657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4406666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5211103723404256 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Qwen2.5-Gutenberg-Doppel-14B/649483fb-4b54-4824-82eb-e78e55e53912.json b/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Qwen2.5-Gutenberg-Doppel-14B/649483fb-4b54-4824-82eb-e78e55e53912.json deleted file mode 100644 index e7fb6674d4bcfa7d92b24abe83ee775a87fdc043..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Qwen2.5-Gutenberg-Doppel-14B/649483fb-4b54-4824-82eb-e78e55e53912.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Qwen2.5-Gutenberg-Doppel-14B/1762652580.38376", - "retrieved_timestamp": "1762652580.38376", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Qwen2.5-Gutenberg-Doppel-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "nbeerbower/Qwen2.5-Gutenberg-Doppel-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8090832324897937 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6381735755183319 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5415407854984894 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4100625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49210438829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b/0d99e863-596f-43b7-932e-a4a27435e63d.json b/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b/0d99e863-596f-43b7-932e-a4a27435e63d.json deleted file mode 100644 index fda4d8a3fff333b451d2fe7423fa663e49ef1243..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b/0d99e863-596f-43b7-932e-a4a27435e63d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b/1762652580.391702", - "retrieved_timestamp": "1762652580.3917031", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "netcat420/DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11500596195871399 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28767781029884354 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0015105740181268882 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3723854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10895944148936171 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-7B-nerd-uncensored-v0.9-MFANN/399b43e8-3c07-4f3d-8b3e-50b8acd96e78.json b/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-7B-nerd-uncensored-v0.9-MFANN/399b43e8-3c07-4f3d-8b3e-50b8acd96e78.json deleted file mode 100644 index 9b126a9169afd8a9e20d7fae23126e9ca1c017f6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-7B-nerd-uncensored-v0.9-MFANN/399b43e8-3c07-4f3d-8b3e-50b8acd96e78.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-7B-nerd-uncensored-v0.9-MFANN/1762652580.400365", - "retrieved_timestamp": "1762652580.400365", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/Qwen2.5-7B-nerd-uncensored-v0.9-MFANN", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "netcat420/Qwen2.5-7B-nerd-uncensored-v0.9-MFANN" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5878413720040603 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5236664966992856 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3376132930513595 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39257291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.390375664893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-7b-MFANN-slerp/d621c163-5ca6-4e54-8913-d931e4a2c6b9.json b/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-7b-MFANN-slerp/d621c163-5ca6-4e54-8913-d931e4a2c6b9.json deleted file mode 100644 index a7485b3da014c082981dd677fd7ed343d62d6f67..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-7b-MFANN-slerp/d621c163-5ca6-4e54-8913-d931e4a2c6b9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-7b-MFANN-slerp/1762652580.4005811", - "retrieved_timestamp": "1762652580.4005818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/Qwen2.5-7b-MFANN-slerp", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "netcat420/Qwen2.5-7b-MFANN-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6532123654126606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5088729928004616 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40730208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3416722074468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-7b-nerd-uncensored-MFANN-slerp/170aa8c2-6b80-44d3-9d22-c1a5f7fa2ad4.json b/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-7b-nerd-uncensored-MFANN-slerp/170aa8c2-6b80-44d3-9d22-c1a5f7fa2ad4.json deleted file mode 100644 index 05ceac128f17d595ea8003705fe090d581cae3f2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-7b-nerd-uncensored-MFANN-slerp/170aa8c2-6b80-44d3-9d22-c1a5f7fa2ad4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-7b-nerd-uncensored-MFANN-slerp/1762652580.4007921", - "retrieved_timestamp": "1762652580.400793", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/Qwen2.5-7b-nerd-uncensored-MFANN-slerp", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "netcat420/Qwen2.5-7b-nerd-uncensored-MFANN-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15644711587476784 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2920111436321769 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3791770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11003989361702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained/2f89ceb3-8bc1-48f0-a4cb-3dc1b8acad87.json b/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained/2f89ceb3-8bc1-48f0-a4cb-3dc1b8acad87.json deleted file mode 100644 index d7c09d8ddb9e9013c6e51aefd6070807a5ae285a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained/2f89ceb3-8bc1-48f0-a4cb-3dc1b8acad87.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained/1762652580.4012349", - "retrieved_timestamp": "1762652580.401236", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6486411610083467 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5065573474607916 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2990936555891239 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41520833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3431682180851064 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN/bbd39707-6062-461a-8e09-c8b8bc3451f7.json b/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN/bbd39707-6062-461a-8e09-c8b8bc3451f7.json deleted file mode 100644 index 1bf9713163d77b44582f11f094b04e17fb079455..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN/bbd39707-6062-461a-8e09-c8b8bc3451f7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN/1762652580.4010181", - "retrieved_timestamp": "1762652580.4010189", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5742274941599401 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5071448530886461 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.256797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40584375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3156582446808511 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b/9b2011ae-9d22-42be-a10b-6ce6e8ff1be4.json b/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b/9b2011ae-9d22-42be-a10b-6ce6e8ff1be4.json deleted file mode 100644 index 041b176694f7321ddb2e3cfa9b9aee4ec1c42fe4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b/9b2011ae-9d22-42be-a10b-6ce6e8ff1be4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b/1762652580.401459", - "retrieved_timestamp": "1762652580.40146", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "netcat420/Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2675556412540947 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37890218644722085 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23238255033557048 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35279166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16771941489361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-MFANN-7b/b6578885-9721-4349-ad55-5a80fd054c85.json b/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-MFANN-7b/b6578885-9721-4349-ad55-5a80fd054c85.json deleted file mode 100644 index 86c2feaea062deaa174faabc64c3f922dd5b36a1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-MFANN-7b/b6578885-9721-4349-ad55-5a80fd054c85.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-MFANN-7b/1762652580.401672", - "retrieved_timestamp": "1762652580.401673", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/Qwen2.5-MFANN-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "netcat420/Qwen2.5-MFANN-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6097233119234742 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5054347004252888 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27870090634441086 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4020625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32330452127659576 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_qwen2.5-MFANN-7b-SLERP-V1.2/dfacdde9-fd5d-496f-8038-aa0439c0c991.json b/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_qwen2.5-MFANN-7b-SLERP-V1.2/dfacdde9-fd5d-496f-8038-aa0439c0c991.json deleted file mode 100644 index 65431ed088f11c05e2e98ed4a76766a817f124a0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_qwen2.5-MFANN-7b-SLERP-V1.2/dfacdde9-fd5d-496f-8038-aa0439c0c991.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_qwen2.5-MFANN-7b-SLERP-V1.2/1762652580.40188", - "retrieved_timestamp": "1762652580.40188", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/qwen2.5-MFANN-7b-SLERP-V1.2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "netcat420/qwen2.5-MFANN-7b-SLERP-V1.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6606060807546199 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5111030308243185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4259375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34383311170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_qwen2.5-MFANN-7b-SLERPv1.1/0e66b7a6-bd6f-48f7-95e2-c117e0ea468f.json b/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_qwen2.5-MFANN-7b-SLERPv1.1/0e66b7a6-bd6f-48f7-95e2-c117e0ea468f.json deleted file mode 100644 index 1fe39bbdaff7bb1ffb154f47cef34f62267cf924..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_qwen2.5-MFANN-7b-SLERPv1.1/0e66b7a6-bd6f-48f7-95e2-c117e0ea468f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_qwen2.5-MFANN-7b-SLERPv1.1/1762652580.402082", - "retrieved_timestamp": "1762652580.4020832", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/qwen2.5-MFANN-7b-SLERPv1.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "netcat420/qwen2.5-MFANN-7b-SLERPv1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6554852236510238 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5074761993537673 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41263541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34483045212765956 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_qwen2.5-MFANN-7b-v1.1/845f96b7-62dc-4ebc-aa62-fcc6263e437f.json b/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_qwen2.5-MFANN-7b-v1.1/845f96b7-62dc-4ebc-aa62-fcc6263e437f.json deleted file mode 100644 index b67c552126ee2c45e4d0666ee3fbb7e0fd40277c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_qwen2.5-MFANN-7b-v1.1/845f96b7-62dc-4ebc-aa62-fcc6263e437f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_qwen2.5-MFANN-7b-v1.1/1762652580.402283", - "retrieved_timestamp": "1762652580.4022841", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/qwen2.5-MFANN-7b-v1.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "netcat420/qwen2.5-MFANN-7b-v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6088489651901399 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49666375554657477 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2824773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41139583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3248005319148936 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v0.3-Qwen2.5-7B/0bc5145c-90d0-4a8b-89c6-0b03aa9d0ee1.json b/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v0.3-Qwen2.5-7B/0bc5145c-90d0-4a8b-89c6-0b03aa9d0ee1.json deleted file mode 100644 index d727387b977885ca419683e5316700b2e02dc922..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v0.3-Qwen2.5-7B/0bc5145c-90d0-4a8b-89c6-0b03aa9d0ee1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/newsbang_Homer-v0.3-Qwen2.5-7B/1762652580.4035761", - "retrieved_timestamp": "1762652580.403577", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "newsbang/Homer-v0.3-Qwen2.5-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "newsbang/Homer-v0.3-Qwen2.5-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5154013572875525 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5480594290467807 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30891238670694865 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47436458333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.445561835106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v0.4-Qwen2.5-7B/9a022bdc-d1b8-4f2e-a1af-6cd3bad6bded.json b/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v0.4-Qwen2.5-7B/9a022bdc-d1b8-4f2e-a1af-6cd3bad6bded.json deleted file mode 100644 index a61aa977ee71b564bcf0d8932cc9864969b62808..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v0.4-Qwen2.5-7B/9a022bdc-d1b8-4f2e-a1af-6cd3bad6bded.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/newsbang_Homer-v0.4-Qwen2.5-7B/1762652580.403887", - "retrieved_timestamp": "1762652580.4038882", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "newsbang/Homer-v0.4-Qwen2.5-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "newsbang/Homer-v0.4-Qwen2.5-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.799940823681166 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5533099174800821 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27794561933534745 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4310833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4362533244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v0.5-Qwen2.5-7B/dc22ad83-0752-4f5e-97ac-733ef6c6cf53.json b/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v0.5-Qwen2.5-7B/dc22ad83-0752-4f5e-97ac-733ef6c6cf53.json deleted file mode 100644 index 574b390dca820bf400006148e8800383450013bf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v0.5-Qwen2.5-7B/dc22ad83-0752-4f5e-97ac-733ef6c6cf53.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/newsbang_Homer-v0.5-Qwen2.5-7B/1762652580.404095", - "retrieved_timestamp": "1762652580.404096", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "newsbang/Homer-v0.5-Qwen2.5-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "newsbang/Homer-v0.5-Qwen2.5-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7880756393037142 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5540181073562815 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3723564954682779 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41930208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4369182180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v1.0-Qwen2.5-72B/3ebdda73-1c41-4a98-b3cf-ac5d482c8b5c.json b/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v1.0-Qwen2.5-72B/3ebdda73-1c41-4a98-b3cf-ac5d482c8b5c.json deleted file mode 100644 index b5e681cdd93e808cda6d95040eb3f538e0cc4da5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v1.0-Qwen2.5-72B/3ebdda73-1c41-4a98-b3cf-ac5d482c8b5c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/newsbang_Homer-v1.0-Qwen2.5-72B/1762652580.404309", - "retrieved_timestamp": "1762652580.40431", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "newsbang/Homer-v1.0-Qwen2.5-72B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "newsbang/Homer-v1.0-Qwen2.5-72B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7627716680629618 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7309799550978827 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4901812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4161073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4677291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6145279255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v1.0-Qwen2.5-7B/1fe21571-0375-43c3-8071-1aaaf0223baa.json b/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v1.0-Qwen2.5-7B/1fe21571-0375-43c3-8071-1aaaf0223baa.json deleted file mode 100644 index 32a45823d91a39aa47170be3bcfe386e53757b9f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v1.0-Qwen2.5-7B/1fe21571-0375-43c3-8071-1aaaf0223baa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/newsbang_Homer-v1.0-Qwen2.5-7B/1762652580.404567", - "retrieved_timestamp": "1762652580.404568", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "newsbang/Homer-v1.0-Qwen2.5-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "newsbang/Homer-v1.0-Qwen2.5-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6392737935344885 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5655254177370223 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42782291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45345744680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/nguyentd_FinancialAdvice-Qwen2.5-7B/0ced7574-bfc4-4958-a6f5-0944f9ac411a.json b/leaderboard_data/HFOpenLLMv2/alibaba/nguyentd_FinancialAdvice-Qwen2.5-7B/0ced7574-bfc4-4958-a6f5-0944f9ac411a.json deleted file mode 100644 index ed71c4d43ac21157c98692f0d7a9583f7ca401a3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/nguyentd_FinancialAdvice-Qwen2.5-7B/0ced7574-bfc4-4958-a6f5-0944f9ac411a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nguyentd_FinancialAdvice-Qwen2.5-7B/1762652580.404779", - "retrieved_timestamp": "1762652580.4047801", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nguyentd/FinancialAdvice-Qwen2.5-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "nguyentd/FinancialAdvice-Qwen2.5-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.449605934476079 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4730934153895792 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1148036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40248958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375249335106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/nisten_franqwenstein-35b/3e3344d2-6911-4d5f-85d6-6593cbed3b49.json b/leaderboard_data/HFOpenLLMv2/alibaba/nisten_franqwenstein-35b/3e3344d2-6911-4d5f-85d6-6593cbed3b49.json deleted file mode 100644 index a9a9c6c96692eb991d9ae98cd56f6ccc98d599c6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/nisten_franqwenstein-35b/3e3344d2-6911-4d5f-85d6-6593cbed3b49.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nisten_franqwenstein-35b/1762652580.407119", - "retrieved_timestamp": "1762652580.40712", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nisten/franqwenstein-35b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "nisten/franqwenstein-35b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39135383005979685 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6591132598701116 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.304380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35906040268456374 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4681041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5610871010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 34.714 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/nisten_franqwenstein-35b/ff90ed4a-6dcf-4b9b-9d3a-19f933e2c0c8.json b/leaderboard_data/HFOpenLLMv2/alibaba/nisten_franqwenstein-35b/ff90ed4a-6dcf-4b9b-9d3a-19f933e2c0c8.json deleted file mode 100644 index 7fe6c03cc07dd2d2731e8639745a5d82f63df8f0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/nisten_franqwenstein-35b/ff90ed4a-6dcf-4b9b-9d3a-19f933e2c0c8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nisten_franqwenstein-35b/1762652580.406877", - "retrieved_timestamp": "1762652580.406878", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nisten/franqwenstein-35b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "nisten/franqwenstein-35b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37986320740080765 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6646579178049268 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3406344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4035234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49402083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5730551861702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 34.714 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/nisten_tqwendo-36b/3a5b1794-12f1-4004-bdb2-309cc950c757.json b/leaderboard_data/HFOpenLLMv2/alibaba/nisten_tqwendo-36b/3a5b1794-12f1-4004-bdb2-309cc950c757.json deleted file mode 100644 index 96a164ceef965dbce4791a19e016301b8c10d372..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/nisten_tqwendo-36b/3a5b1794-12f1-4004-bdb2-309cc950c757.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nisten_tqwendo-36b/1762652580.40731", - "retrieved_timestamp": "1762652580.4073112", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nisten/tqwendo-36b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "nisten/tqwendo-36b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6777672132164878 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6431830832659088 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41540785498489424 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44295833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4380817819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 35.69 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Galactic-Qwen-14B-Exp1/26aea3e6-571c-4751-8b0f-40a86a144973.json b/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Galactic-Qwen-14B-Exp1/26aea3e6-571c-4751-8b0f-40a86a144973.json deleted file mode 100644 index e2a00f3ad6d0bd74b87198e80234d246ae171598..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Galactic-Qwen-14B-Exp1/26aea3e6-571c-4751-8b0f-40a86a144973.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Galactic-Qwen-14B-Exp1/1762652580.463281", - "retrieved_timestamp": "1762652580.463281", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Galactic-Qwen-14B-Exp1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "prithivMLmods/Galactic-Qwen-14B-Exp1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5832202999153357 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6582262489447345 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40181268882175225 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4780520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.539561170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Galactic-Qwen-14B-Exp2/2fcdb8f8-5ec6-494a-b690-fa96febdb02a.json b/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Galactic-Qwen-14B-Exp2/2fcdb8f8-5ec6-494a-b690-fa96febdb02a.json deleted file mode 100644 index 35bc920aca6df53b21dbbf47ac70591029ce7f67..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Galactic-Qwen-14B-Exp2/2fcdb8f8-5ec6-494a-b690-fa96febdb02a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Galactic-Qwen-14B-Exp2/1762652580.463546", - "retrieved_timestamp": "1762652580.463547", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Galactic-Qwen-14B-Exp2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "prithivMLmods/Galactic-Qwen-14B-Exp2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6620300801872365 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7203002699449659 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39932885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5353854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5690658244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Magellanic-Qwen-25B-R999/08bfcf7b-e051-4c64-b1ee-0044cfa166f0.json b/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Magellanic-Qwen-25B-R999/08bfcf7b-e051-4c64-b1ee-0044cfa166f0.json deleted file mode 100644 index c3a6c36c63387ae73010d1d81f14481bd52814dd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Magellanic-Qwen-25B-R999/08bfcf7b-e051-4c64-b1ee-0044cfa166f0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Magellanic-Qwen-25B-R999/1762652580.466958", - "retrieved_timestamp": "1762652580.466959", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Magellanic-Qwen-25B-R999", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "prithivMLmods/Magellanic-Qwen-25B-R999" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18727199386516663 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26075689808294905 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3831145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299867021276596 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 24.962 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Qwen-7B-Distill-Reasoner/7afe076b-7f6a-42c1-9c43-652ea3ca94a9.json b/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Qwen-7B-Distill-Reasoner/7afe076b-7f6a-42c1-9c43-652ea3ca94a9.json deleted file mode 100644 index 477c87b6adf4bfd6d160db22b5d489a24ece7459..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Qwen-7B-Distill-Reasoner/7afe076b-7f6a-42c1-9c43-652ea3ca94a9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Qwen-7B-Distill-Reasoner/1762652580.474049", - "retrieved_timestamp": "1762652580.47405", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Qwen-7B-Distill-Reasoner", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "prithivMLmods/Qwen-7B-Distill-Reasoner" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3395712265677292 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4409329229697952 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3950151057401813 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36596874999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2818317819148936 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Qwen2.5-14B-DeepSeek-R1-1M/eacd8987-9631-4199-97ef-2cdc41879e8b.json b/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Qwen2.5-14B-DeepSeek-R1-1M/eacd8987-9631-4199-97ef-2cdc41879e8b.json deleted file mode 100644 index ae9a366cf26c4fc97f2b1d47f109ca9165401219..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Qwen2.5-14B-DeepSeek-R1-1M/eacd8987-9631-4199-97ef-2cdc41879e8b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Qwen2.5-14B-DeepSeek-R1-1M/1762652580.474647", - "retrieved_timestamp": "1762652580.474647", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Qwen2.5-14B-DeepSeek-R1-1M", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "prithivMLmods/Qwen2.5-14B-DeepSeek-R1-1M" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4192808415005519 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5934849375153814 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5128398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4606041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48994348404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Qwen2.5-7B-DeepSeek-R1-1M/4edb337d-b56c-4009-9199-22223d4ff9f8.json b/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Qwen2.5-7B-DeepSeek-R1-1M/4edb337d-b56c-4009-9199-22223d4ff9f8.json deleted file mode 100644 index 26092916bf186c6df506b4816c2962b688c3c282..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Qwen2.5-7B-DeepSeek-R1-1M/4edb337d-b56c-4009-9199-22223d4ff9f8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Qwen2.5-7B-DeepSeek-R1-1M/1762652580.474907", - "retrieved_timestamp": "1762652580.4749079", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Qwen2.5-7B-DeepSeek-R1-1M", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "prithivMLmods/Qwen2.5-7B-DeepSeek-R1-1M" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18612282078219125 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3125554204779005 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3416875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009640957446809 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/qingy2024_Qwen2.5-4B/c332cc18-e556-4b23-a45d-df26c250faa2.json b/leaderboard_data/HFOpenLLMv2/alibaba/qingy2024_Qwen2.5-4B/c332cc18-e556-4b23-a45d-df26c250faa2.json deleted file mode 100644 index 757becf25b37fe65b9f4c30032c6d82add809c5f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/qingy2024_Qwen2.5-4B/c332cc18-e556-4b23-a45d-df26c250faa2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2024_Qwen2.5-4B/1762652580.486805", - "retrieved_timestamp": "1762652580.486807", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2024/Qwen2.5-4B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "qingy2024/Qwen2.5-4B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21584839337402537 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4269378314466817 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46103125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2524933510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 4.168 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-Coder-V2.5-Qwen-14b/4f7b356a-1484-458c-8bc1-2640e039ab70.json b/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-Coder-V2.5-Qwen-14b/4f7b356a-1484-458c-8bc1-2640e039ab70.json deleted file mode 100644 index ae24aa29dcc1fb1c249520f831271f363593dea5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-Coder-V2.5-Qwen-14b/4f7b356a-1484-458c-8bc1-2640e039ab70.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-Coder-V2.5-Qwen-14b/1762652580.496415", - "retrieved_timestamp": "1762652580.496416", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rombodawg/Rombos-Coder-V2.5-Qwen-14b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-Coder-V2.5-Qwen-14b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7047445223119102 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6165135323666455 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3300604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3914583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3939494680851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-Coder-V2.5-Qwen-7b/ca077d1a-a122-4040-b7d9-924773ce67ca.json b/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-Coder-V2.5-Qwen-7b/ca077d1a-a122-4040-b7d9-924773ce67ca.json deleted file mode 100644 index 4a2c44996aba8ad6151dbf5c3f1302065b8091c0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-Coder-V2.5-Qwen-7b/ca077d1a-a122-4040-b7d9-924773ce67ca.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-Coder-V2.5-Qwen-7b/1762652580.4966788", - "retrieved_timestamp": "1762652580.49668", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rombodawg/Rombos-Coder-V2.5-Qwen-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-Coder-V2.5-Qwen-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6210388436016436 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5077090028113894 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338368580060423 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3979375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976063829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-0.5b/796ed438-2be4-45e6-9de9-c98ddd51f3d4.json b/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-0.5b/796ed438-2be4-45e6-9de9-c98ddd51f3d4.json deleted file mode 100644 index 6800e6ecefbd8441545bf1dcef4009d7575e8c27..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-0.5b/796ed438-2be4-45e6-9de9-c98ddd51f3d4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-0.5b/1762652580.4969", - "retrieved_timestamp": "1762652580.4969", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rombodawg/Rombos-LLM-V2.5-Qwen-0.5b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-LLM-V2.5-Qwen-0.5b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28466690603155187 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32936751831436256 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32358333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18658577127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-1.5b/51f579c0-b5b4-4e01-9c19-b68fb6a21210.json b/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-1.5b/51f579c0-b5b4-4e01-9c19-b68fb6a21210.json deleted file mode 100644 index 0a80ca51860a4a81f58c333f0984a052ed249a19..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-1.5b/51f579c0-b5b4-4e01-9c19-b68fb6a21210.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-1.5b/1762652580.497122", - "retrieved_timestamp": "1762652580.497123", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rombodawg/Rombos-LLM-V2.5-Qwen-1.5b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-LLM-V2.5-Qwen-1.5b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3402461025634206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4256703145864387 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08534743202416918 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4185520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2922207446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-14b/91ec838e-699a-4c68-aa42-a9f0b3b6b0c2.json b/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-14b/91ec838e-699a-4c68-aa42-a9f0b3b6b0c2.json deleted file mode 100644 index 5e4103fafce4361dedf5d0d34c155c4dd149e164..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-14b/91ec838e-699a-4c68-aa42-a9f0b3b6b0c2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-14b/1762652580.4975061", - "retrieved_timestamp": "1762652580.497507", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rombodawg/Rombos-LLM-V2.5-Qwen-14b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-LLM-V2.5-Qwen-14b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5840447789642593 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6481086261669653 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4554380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716442953020134 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4717291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5375664893617021 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-32b/07e926c9-d8bb-41da-b41e-8fddc9fb99d8.json b/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-32b/07e926c9-d8bb-41da-b41e-8fddc9fb99d8.json deleted file mode 100644 index 42247a7172be0ac286158541617bd67b4baff507..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-32b/07e926c9-d8bb-41da-b41e-8fddc9fb99d8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-32b/1762652580.497819", - "retrieved_timestamp": "1762652580.49782", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rombodawg/Rombos-LLM-V2.5-Qwen-32b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-LLM-V2.5-Qwen-32b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6826631116548536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7045537070859799 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4954682779456193 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39681208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5034166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5915890957446809 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-3b/976e132a-8352-43fd-abdf-0fc4a04e9429.json b/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-3b/976e132a-8352-43fd-abdf-0fc4a04e9429.json deleted file mode 100644 index d0af513b8282d284698b7eb4d2a39e39cd1b68e5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-3b/976e132a-8352-43fd-abdf-0fc4a04e9429.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-3b/1762652580.498058", - "retrieved_timestamp": "1762652580.498058", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rombodawg/Rombos-LLM-V2.5-Qwen-3b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-LLM-V2.5-Qwen-3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5342358276040905 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4808896246368473 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2794561933534743 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4041666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37608045212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-72b/1ae05e9f-d432-4e7f-a662-4b4a118333d9.json b/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-72b/1ae05e9f-d432-4e7f-a662-4b4a118333d9.json deleted file mode 100644 index 3b4ca47cfad4071ba18a888375dad68621aed8bc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-72b/1ae05e9f-d432-4e7f-a662-4b4a118333d9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-72b/1762652580.498325", - "retrieved_timestamp": "1762652580.498326", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rombodawg/Rombos-LLM-V2.5-Qwen-72b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-LLM-V2.5-Qwen-72b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.715535889218385 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7229589065788488 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5422960725075529 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39848993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4599166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.593500664893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-7b/23ec1efe-a9a1-41cb-9695-4be0ceb3c199.json b/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-7b/23ec1efe-a9a1-41cb-9695-4be0ceb3c199.json deleted file mode 100644 index e8c91f5749777a17e17e7fdf5acf34a8b6192b43..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-7b/23ec1efe-a9a1-41cb-9695-4be0ceb3c199.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-7b/1762652580.498573", - "retrieved_timestamp": "1762652580.498574", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rombodawg/Rombos-LLM-V2.5-Qwen-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-LLM-V2.5-Qwen-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6237117514860571 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5543885046903589 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3814199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42909375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4468916223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5.1-Qwen-3b/3f1ffcf0-10bb-46b2-ae30-3eb958e943a1.json b/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5.1-Qwen-3b/3f1ffcf0-10bb-46b2-ae30-3eb958e943a1.json deleted file mode 100644 index c4848a828a4711d68bb6594d6b1eeaa1d5faf78d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5.1-Qwen-3b/3f1ffcf0-10bb-46b2-ae30-3eb958e943a1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5.1-Qwen-3b/1762652580.498805", - "retrieved_timestamp": "1762652580.498805", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2595125378440316 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3884043024656656 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09138972809667675 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39911458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27194148936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5.1-Qwen-3b/91240596-5842-4441-b976-01ed7545bd1f.json b/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5.1-Qwen-3b/91240596-5842-4441-b976-01ed7545bd1f.json deleted file mode 100644 index 15a8604dbad29d7740a8712913b38051c9c6910d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5.1-Qwen-3b/91240596-5842-4441-b976-01ed7545bd1f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5.1-Qwen-3b/1762652580.499037", - "retrieved_timestamp": "1762652580.499037", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2566401592219755 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39000839740376536 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39911458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27410239361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.6-Qwen-14b/5842364a-2721-4882-90f3-97eba7c3b93a.json b/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.6-Qwen-14b/5842364a-2721-4882-90f3-97eba7c3b93a.json deleted file mode 100644 index 31b859d81dc231b44377a459213243a7682e1e32..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.6-Qwen-14b/5842364a-2721-4882-90f3-97eba7c3b93a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.6-Qwen-14b/1762652580.499588", - "retrieved_timestamp": "1762652580.4995892", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rombodawg/Rombos-LLM-V2.6-Qwen-14b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-LLM-V2.6-Qwen-14b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8431550508207113 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6442096596344892 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4220625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49609375 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/securin_Securin-LLM-V2.5-Qwen-1.5B/cbd0163f-fbea-4f40-a26b-a0508ec02061.json b/leaderboard_data/HFOpenLLMv2/alibaba/securin_Securin-LLM-V2.5-Qwen-1.5B/cbd0163f-fbea-4f40-a26b-a0508ec02061.json deleted file mode 100644 index cc3d9ae47c6587488a45966d3b1015855e7749c8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/securin_Securin-LLM-V2.5-Qwen-1.5B/cbd0163f-fbea-4f40-a26b-a0508ec02061.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/securin_Securin-LLM-V2.5-Qwen-1.5B/1762652580.510926", - "retrieved_timestamp": "1762652580.5109272", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "securin/Securin-LLM-V2.5-Qwen-1.5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "securin/Securin-LLM-V2.5-Qwen-1.5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1492030035860406 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3158416288115425 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3606354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16148603723404256 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.543 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sethuiyer_Qwen2.5-7B-Anvita/f2571e64-be03-4482-b5b4-d120444b0586.json b/leaderboard_data/HFOpenLLMv2/alibaba/sethuiyer_Qwen2.5-7B-Anvita/f2571e64-be03-4482-b5b4-d120444b0586.json deleted file mode 100644 index 48af2c9a36f4c74f74a3507ff21cb1851c0fcfdb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sethuiyer_Qwen2.5-7B-Anvita/f2571e64-be03-4482-b5b4-d120444b0586.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sethuiyer_Qwen2.5-7B-Anvita/1762652580.514066", - "retrieved_timestamp": "1762652580.514067", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sethuiyer/Qwen2.5-7B-Anvita", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sethuiyer/Qwen2.5-7B-Anvita" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6480416406246536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5465860266784314 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20166163141993956 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43365625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4165558510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/someon98_qwen-CoMa-0.5b/be4ee67a-59d7-4098-992e-5f75cd53cdbc.json b/leaderboard_data/HFOpenLLMv2/alibaba/someon98_qwen-CoMa-0.5b/be4ee67a-59d7-4098-992e-5f75cd53cdbc.json deleted file mode 100644 index 54b109ff5e9014f6ce7047c5457885b7da5e04b8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/someon98_qwen-CoMa-0.5b/be4ee67a-59d7-4098-992e-5f75cd53cdbc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/someon98_qwen-CoMa-0.5b/1762652580.518077", - "retrieved_timestamp": "1762652580.5180779", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "someon98/qwen-CoMa-0.5b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "someon98/qwen-CoMa-0.5b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22766371006706648 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29533439538939815 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23993288590604026 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40457291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10987367021276596 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Lamarck-14B-v0.4-Qwenvergence/41393c10-c1e5-4ccd-bcb1-df5392cb8ec6.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Lamarck-14B-v0.4-Qwenvergence/41393c10-c1e5-4ccd-bcb1-df5392cb8ec6.json deleted file mode 100644 index 9b119388ed727cea61b75b7411fa1980c7644064..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Lamarck-14B-v0.4-Qwenvergence/41393c10-c1e5-4ccd-bcb1-df5392cb8ec6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.4-Qwenvergence/1762652580.5196202", - "retrieved_timestamp": "1762652580.5196211", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Lamarck-14B-v0.4-Qwenvergence", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Lamarck-14B-v0.4-Qwenvergence" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4906470387460826 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6535142192324058 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3783557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4846875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5406416223404256 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen-14B-ProseStock-v4/e68bc90b-1274-4e28-b280-65e6ceba53f8.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen-14B-ProseStock-v4/e68bc90b-1274-4e28-b280-65e6ceba53f8.json deleted file mode 100644 index 585967e2fe662ab8ade9483ea3e3ec944c1cc335..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen-14B-ProseStock-v4/e68bc90b-1274-4e28-b280-65e6ceba53f8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen-14B-ProseStock-v4/1762652580.522184", - "retrieved_timestamp": "1762652580.5221848", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwen-14B-ProseStock-v4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwen-14B-ProseStock-v4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4942186731206532 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6498268976192769 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640483383685801 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3884228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49383333333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5386469414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen-2.5-14B-Virmarckeoso/dc7af75a-f45a-449a-b6ba-cc033d7de79f.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen-2.5-14B-Virmarckeoso/dc7af75a-f45a-449a-b6ba-cc033d7de79f.json deleted file mode 100644 index ce8eec1e423bad4f36920f28f32e4530ad4810de..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen-2.5-14B-Virmarckeoso/dc7af75a-f45a-449a-b6ba-cc033d7de79f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen-2.5-14B-Virmarckeoso/1762652580.5224378", - "retrieved_timestamp": "1762652580.522439", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwen-2.5-14B-Virmarckeoso", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwen-2.5-14B-Virmarckeoso" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4813295389566351 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6569729950776678 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3564954682779456 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4793541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5377327127659575 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v2/5242491e-deb4-41ae-8d70-5b0d8ffb7bc7.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v2/5242491e-deb4-41ae-8d70-5b0d8ffb7bc7.json deleted file mode 100644 index d8056cc58b824ef117e6fc2b9b7cc1f071fb5b7b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v2/5242491e-deb4-41ae-8d70-5b0d8ffb7bc7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso-v2/1762652580.52286", - "retrieved_timestamp": "1762652580.522861", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4505301488938239 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6550336897572636 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3580060422960725 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3825503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48189583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5379820478723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-IF-Variant/9df5ab5a-16cf-478f-87f0-1b8717e1e330.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-IF-Variant/9df5ab5a-16cf-478f-87f0-1b8717e1e330.json deleted file mode 100644 index f5ee338698853f6fccdb1441b0c89e9ac9b13a45..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-IF-Variant/9df5ab5a-16cf-478f-87f0-1b8717e1e330.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-IF-Variant/1762652580.523307", - "retrieved_timestamp": "1762652580.523308", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-IF-Variant", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-IF-Variant" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6412973133507981 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5520788965536542 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2545317220543807 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5319166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4588597074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-Prose01/dd84656a-3b61-4241-a2eb-a5f52ff58ed2.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-Prose01/dd84656a-3b61-4241-a2eb-a5f52ff58ed2.json deleted file mode 100644 index 5eed745fd33713a0b84db1dc6345c8d9d3f60a4b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-Prose01/dd84656a-3b61-4241-a2eb-a5f52ff58ed2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-Prose01/1762652580.523516", - "retrieved_timestamp": "1762652580.523516", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6872343160591674 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6358769213927613 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3995468277945619 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38674496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48071875000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5275099734042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-model_stock/ba7b8cb4-608a-4bf0-b107-51e721f88dee.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-model_stock/ba7b8cb4-608a-4bf0-b107-51e721f88dee.json deleted file mode 100644 index 7140ca4c6d12feafee08bbb03283b4a7918bdc84..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-model_stock/ba7b8cb4-608a-4bf0-b107-51e721f88dee.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-model_stock/1762652580.5237172", - "retrieved_timestamp": "1762652580.5237179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7161852772864887 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6420915332649074 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4244712990936556 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47811458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5315824468085106 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3/9e453ef2-bae1-4a06-8778-d9c0dfae33e8.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3/9e453ef2-bae1-4a06-8778-d9c0dfae33e8.json deleted file mode 100644 index 532b1aec036c7a59cfd88875d166aaecb80a3212..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3/9e453ef2-bae1-4a06-8778-d9c0dfae33e8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3/1762652580.52309", - "retrieved_timestamp": "1762652580.52309", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7256523801291683 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.641460062329604 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4806875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5343251329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso/b3b73406-3b25-4a23-9e13-53fafdd66552.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso/b3b73406-3b25-4a23-9e13-53fafdd66552.json deleted file mode 100644 index 9fafa714074bd6255965e3ddd20b7bc233faa20a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso/b3b73406-3b25-4a23-9e13-53fafdd66552.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso/1762652580.522644", - "retrieved_timestamp": "1762652580.522645", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45742407922091166 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6446348390056346 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.338368580060423 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3926174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4858645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5329122340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-7B-Gordion-v0.1-Prose/dceb35c6-30bb-483c-aa62-8273b409311b.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-7B-Gordion-v0.1-Prose/dceb35c6-30bb-483c-aa62-8273b409311b.json deleted file mode 100644 index 1e1a42d358b2e38978b1c15256e253f6791b2c44..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-7B-Gordion-v0.1-Prose/dceb35c6-30bb-483c-aa62-8273b409311b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-7B-Gordion-v0.1-Prose/1762652580.524123", - "retrieved_timestamp": "1762652580.524123", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Prose", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Prose" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5347101246913745 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5599089581177875 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2892749244712991 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45017708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4525432180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-7B-Gordion-v0.1-Reason/100a253a-3409-4145-8a9d-0bf821e3ce91.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-7B-Gordion-v0.1-Reason/100a253a-3409-4145-8a9d-0bf821e3ce91.json deleted file mode 100644 index 4a291e1e6193c037520cb8586722369499cf4578..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-7B-Gordion-v0.1-Reason/100a253a-3409-4145-8a9d-0bf821e3ce91.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-7B-Gordion-v0.1-Reason/1762652580.5243258", - "retrieved_timestamp": "1762652580.5243268", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Reason", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Reason" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49172085621705963 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5498169530870823 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2620845921450151 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060402684563756 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4434166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4306848404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-7B-Gordion-v0.1/174b2a17-c4fa-4021-868b-9c23a99603c9.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-7B-Gordion-v0.1/174b2a17-c4fa-4021-868b-9c23a99603c9.json deleted file mode 100644 index 79eadfa22c5e2633ec88085882722464581d0e38..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-7B-Gordion-v0.1/174b2a17-c4fa-4021-868b-9c23a99603c9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-7B-Gordion-v0.1/1762652580.5239239", - "retrieved_timestamp": "1762652580.523925", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.748183708116686 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5523808037550308 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29154078549848944 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40162499999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43001994680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentessential-14B-v1/3cce1e77-5dfc-44d2-b0c2-f7220d989e9d.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentessential-14B-v1/3cce1e77-5dfc-44d2-b0c2-f7220d989e9d.json deleted file mode 100644 index 54c8eedfca1daf17f64ca0fdd9be7b6c2306bea7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentessential-14B-v1/3cce1e77-5dfc-44d2-b0c2-f7220d989e9d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentessential-14B-v1/1762652580.524672", - "retrieved_timestamp": "1762652580.524674", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwentessential-14B-v1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwentessential-14B-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6279083941719084 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6545165968552056 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070996978851964 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4872916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5381482712765957 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v013/8127e367-fbd2-475d-a4f0-b8895dec6741.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v013/8127e367-fbd2-475d-a4f0-b8895dec6741.json deleted file mode 100644 index 284844c2bc834b14552cf2af8a54ebcea6edaaf4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v013/8127e367-fbd2-475d-a4f0-b8895dec6741.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v013/1762652580.5250719", - "retrieved_timestamp": "1762652580.525074", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwentinuum-14B-v013", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwentinuum-14B-v013" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6711226213114536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6086634082040333 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37084592145015105 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3573825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5154166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49908577127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v1/c68a024d-fa21-4584-bde5-42121e919af7.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v1/c68a024d-fa21-4584-bde5-42121e919af7.json deleted file mode 100644 index a78e2e058f5a72f775f6b1f5956a9dc611c58a36..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v1/c68a024d-fa21-4584-bde5-42121e919af7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v1/1762652580.5253482", - "retrieved_timestamp": "1762652580.5253491", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwentinuum-14B-v1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwentinuum-14B-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5031616111916382 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6572572845221036 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36027190332326287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3825503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4780520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5409740691489362 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v2/ce1feb87-4f78-4ff1-a548-b3409591166f.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v2/ce1feb87-4f78-4ff1-a548-b3409591166f.json deleted file mode 100644 index 2f39447b7c968220890ef8ed802b5c976102a4ce..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v2/ce1feb87-4f78-4ff1-a548-b3409591166f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v2/1762652580.525585", - "retrieved_timestamp": "1762652580.525586", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwentinuum-14B-v2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwentinuum-14B-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5378329499062487 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6555355668062347 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37537764350453173 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3884228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47141666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5408909574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v3/96b75db5-4e23-4179-bbf7-801f35d31af7.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v3/96b75db5-4e23-4179-bbf7-801f35d31af7.json deleted file mode 100644 index 18ac34a8130fe9f2a8d77a4ecafcf4572b4d84a7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v3/96b75db5-4e23-4179-bbf7-801f35d31af7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v3/1762652580.525815", - "retrieved_timestamp": "1762652580.525816", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwentinuum-14B-v3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwentinuum-14B-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6157683834448153 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6538645567116264 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48598958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5413065159574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v5/16e0de9b-9717-4451-babc-8df8748c4efe.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v5/16e0de9b-9717-4451-babc-8df8748c4efe.json deleted file mode 100644 index 774ae77f2cd4aad5373c447a133282c01f3c64de..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v5/16e0de9b-9717-4451-babc-8df8748c4efe.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v5/1762652580.5261161", - "retrieved_timestamp": "1762652580.526117", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwentinuum-14B-v5", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwentinuum-14B-v5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.628557782240012 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.654985060704008 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34441087613293053 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4873854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5418051861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v6-Prose/8eecc1a5-d42e-423c-9155-daf66a414361.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v6-Prose/8eecc1a5-d42e-423c-9155-daf66a414361.json deleted file mode 100644 index 77fde0e804cd664ba199195a0b09e660691fe2b7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v6-Prose/8eecc1a5-d42e-423c-9155-daf66a414361.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v6-Prose/1762652580.52656", - "retrieved_timestamp": "1762652580.526561", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwentinuum-14B-v6-Prose", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwentinuum-14B-v6-Prose" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5642860942299764 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6545112522796068 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37009063444108764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3884228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4912604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5392287234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v6/93e0bcb6-be72-4e9c-adbc-c8fce3240b0d.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v6/93e0bcb6-be72-4e9c-adbc-c8fce3240b0d.json deleted file mode 100644 index 1ac01b8c10462a47ba5e9a00240e2dd0c12ecd9c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v6/93e0bcb6-be72-4e9c-adbc-c8fce3240b0d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v6/1762652580.526352", - "retrieved_timestamp": "1762652580.526353", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwentinuum-14B-v6", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwentinuum-14B-v6" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6304062110755019 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6544517420216159 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36027190332326287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38674496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48995833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5399767287234043 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v7/6aaa1633-f780-42d4-b43e-5a4d31cf7aae.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v7/6aaa1633-f780-42d4-b43e-5a4d31cf7aae.json deleted file mode 100644 index 569ccae138329a26d9ddf8c885d0af543d058c81..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v7/6aaa1633-f780-42d4-b43e-5a4d31cf7aae.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v7/1762652580.526774", - "retrieved_timestamp": "1762652580.526774", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwentinuum-14B-v7", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwentinuum-14B-v7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6109223526908603 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6551430222697051 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35725075528700906 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39093959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48198958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5409740691489362 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v8/6be09829-08e5-4d45-a091-5451f6c74d51.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v8/6be09829-08e5-4d45-a091-5451f6c74d51.json deleted file mode 100644 index c977a33245c753abdf4fb5c1ec7fea8983e85007..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v8/6be09829-08e5-4d45-a091-5451f6c74d51.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v8/1762652580.526987", - "retrieved_timestamp": "1762652580.526987", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwentinuum-14B-v8", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwentinuum-14B-v8" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5411552458587658 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6534258495008117 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39123867069486407 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38338926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48732291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5412234042553191 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v9/cea3e14d-a43d-4e32-b8fc-d8ae995190d8.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v9/cea3e14d-a43d-4e32-b8fc-d8ae995190d8.json deleted file mode 100644 index 26d3b46e0e720be409ccbd4c1fc28a7a3b6ed895..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v9/cea3e14d-a43d-4e32-b8fc-d8ae995190d8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v9/1762652580.5271978", - "retrieved_timestamp": "1762652580.527199", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwentinuum-14B-v9", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwentinuum-14B-v9" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5107304175144174 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6580257842849174 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34818731117824775 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47811458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5421376329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-qv256/f06fc349-e84e-4ec7-a9c9-8819896c2beb.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-qv256/f06fc349-e84e-4ec7-a9c9-8819896c2beb.json deleted file mode 100644 index 7521b02e6952c99769399b42308bad855670fa56..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-qv256/f06fc349-e84e-4ec7-a9c9-8819896c2beb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-qv256/1762652580.52741", - "retrieved_timestamp": "1762652580.527411", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-qv256", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-qv256" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7006232352380573 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6312084721949004 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38972809667673713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3783557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49259375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5177859042553191 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v0.6-004-model_stock/86591e86-5bfb-4e8e-b910-bf6b5011562c.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v0.6-004-model_stock/86591e86-5bfb-4e8e-b910-bf6b5011562c.json deleted file mode 100644 index e85b3e090aa8d62c8e432cfb09ab18cc25efafe7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v0.6-004-model_stock/86591e86-5bfb-4e8e-b910-bf6b5011562c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v0.6-004-model_stock/1762652580.5276191", - "retrieved_timestamp": "1762652580.52762", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v0.6-004-model_stock", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v0.6-004-model_stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6859854076073706 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6249338707540049 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4093655589123867 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38338926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5033229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.519281914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v10/f2b35397-f539-4129-8e1f-f9dae9c9431b.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v10/f2b35397-f539-4129-8e1f-f9dae9c9431b.json deleted file mode 100644 index 4fac89f06cf5f50fde019d3a0254dd04a097daa1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v10/f2b35397-f539-4129-8e1f-f9dae9c9431b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v10/1762652580.5278451", - "retrieved_timestamp": "1762652580.5278451", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v10", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v10" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6756938257157675 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6316425399409628 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4788519637462236 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49913541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.523936170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v11/50ae9dc0-efcc-43cb-8704-6dfb9270656a.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v11/50ae9dc0-efcc-43cb-8704-6dfb9270656a.json deleted file mode 100644 index 2961ff5b9b295d1529cfefda0f93b0c29535bc7e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v11/50ae9dc0-efcc-43cb-8704-6dfb9270656a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v11/1762652580.528142", - "retrieved_timestamp": "1762652580.5281432", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v11", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v11" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7192327468893647 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6367548394062034 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4645015105740181 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3724832214765101 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4754479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5327460106382979 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v12-Prose-DS/a6c5b80d-e685-405a-8444-1be1ed763d2e.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v12-Prose-DS/a6c5b80d-e685-405a-8444-1be1ed763d2e.json deleted file mode 100644 index e0ff01d64cfc2fa0a7711c1fe7d40f844c37d619..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v12-Prose-DS/a6c5b80d-e685-405a-8444-1be1ed763d2e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v12-Prose-DS/1762652580.52859", - "retrieved_timestamp": "1762652580.5285912", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v12-Prose-DS", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v12-Prose-DS" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6173419859306639 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6506726813719318 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43051359516616317 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39429530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5150729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5369015957446809 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v12-Prose/052e63b2-028b-4a4a-ae2b-51514e982239.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v12-Prose/052e63b2-028b-4a4a-ae2b-51514e982239.json deleted file mode 100644 index 13ae2aea4ce5f52e83a6c82da67d24d98a9774ef..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v12-Prose/052e63b2-028b-4a4a-ae2b-51514e982239.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v12-Prose/1762652580.52837", - "retrieved_timestamp": "1762652580.5283709", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v12-Prose", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v12-Prose" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5412051135431766 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6504247508173936 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38674496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49913541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5381482712765957 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v13-Prose-DS/f205507c-48ef-4a40-a0e8-39f5f7bf2cdb.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v13-Prose-DS/f205507c-48ef-4a40-a0e8-39f5f7bf2cdb.json deleted file mode 100644 index e2bf3265f18b29c26c958aa52ac8c5f9dd3a9008..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v13-Prose-DS/f205507c-48ef-4a40-a0e8-39f5f7bf2cdb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v13-Prose-DS/1762652580.528805", - "retrieved_timestamp": "1762652580.528806", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v13-Prose-DS", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v13-Prose-DS" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.717808747456748 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6405077084802886 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859516616314199 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38338926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49265625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.534906914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v15-Prose-MS/a9434630-a7cd-4dc1-b542-e76402344166.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v15-Prose-MS/a9434630-a7cd-4dc1-b542-e76402344166.json deleted file mode 100644 index 37511333111219c653f0b43410539d285a71d786..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v15-Prose-MS/a9434630-a7cd-4dc1-b542-e76402344166.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v15-Prose-MS/1762652580.529013", - "retrieved_timestamp": "1762652580.529014", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v15-Prose-MS", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v15-Prose-MS" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5032114788760489 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6550130348108012 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3632930513595166 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4912916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.539311835106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v2-Prose/f639d7e3-ffb9-4dc5-ab20-993522afa5b4.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v2-Prose/f639d7e3-ffb9-4dc5-ab20-993522afa5b4.json deleted file mode 100644 index 933c88b5d375b98488b44a9d1ac7ba10c9298848..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v2-Prose/f639d7e3-ffb9-4dc5-ab20-993522afa5b4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v2-Prose/1762652580.529223", - "retrieved_timestamp": "1762652580.529224", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v2-Prose", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v2-Prose" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47048830436574957 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6518830473518972 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3557401812688822 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49259375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5371509308510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v3-Prose/37c4d6b3-9964-45d3-a6ed-8b84229ed304.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v3-Prose/37c4d6b3-9964-45d3-a6ed-8b84229ed304.json deleted file mode 100644 index de26eea967683d9d6ac6043a6887dbb81ec8c45c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v3-Prose/37c4d6b3-9964-45d3-a6ed-8b84229ed304.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v3-Prose/1762652580.5297742", - "retrieved_timestamp": "1762652580.5297751", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v3-Prose", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v3-Prose" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49177072390147036 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6512913170949324 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3648036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49389583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5369847074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v3-Reason/50c37538-a425-4b30-a9e0-9a60f6b2492f.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v3-Reason/50c37538-a425-4b30-a9e0-9a60f6b2492f.json deleted file mode 100644 index 205eb17aaa24420106abdaf9710b7a75e0108055..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v3-Reason/50c37538-a425-4b30-a9e0-9a60f6b2492f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v3-Reason/1762652580.530208", - "retrieved_timestamp": "1762652580.530208", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v3-Reason", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v3-Reason" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5278161943642867 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6557437566824342 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3119335347432024 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38422818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47541666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5396442819148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v3-Reason/58ac7b57-e498-4de0-95aa-475c9c56aaf6.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v3-Reason/58ac7b57-e498-4de0-95aa-475c9c56aaf6.json deleted file mode 100644 index 89fb71c5be753ab5a728057fe6c63317f5c08d34..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v3-Reason/58ac7b57-e498-4de0-95aa-475c9c56aaf6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v3-Reason/1762652580.530001", - "retrieved_timestamp": "1762652580.530001", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v3-Reason", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v3-Reason" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5366837768232734 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6561283957466177 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3580060422960725 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38674496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47402083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5394780585106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v3/6cefa467-dae0-4b8b-bd5c-3343f1bfe111.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v3/6cefa467-dae0-4b8b-bd5c-3343f1bfe111.json deleted file mode 100644 index 8f91d2249c43f62e98aa682072ef0cbae3f6d8ea..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v3/6cefa467-dae0-4b8b-bd5c-3343f1bfe111.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v3/1762652580.529505", - "retrieved_timestamp": "1762652580.529512", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.504410519643435 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.654823836148701 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3693353474320242 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38422818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48859375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5386469414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v6-Prose-model_stock/7f57b41f-d8e8-46a0-ad1f-2638e287bce7.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v6-Prose-model_stock/7f57b41f-d8e8-46a0-ad1f-2638e287bce7.json deleted file mode 100644 index 4ef51a35bb47dd5ef0d1983a72698f99dc88900c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v6-Prose-model_stock/7f57b41f-d8e8-46a0-ad1f-2638e287bce7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v6-Prose-model_stock/1762652580.530609", - "retrieved_timestamp": "1762652580.5306098", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v6-Prose-model_stock", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v6-Prose-model_stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48110458029140457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6530441861690175 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36027190332326287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48989583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5387300531914894 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v6-Prose/fa88bc37-eb6b-4d69-8983-7a489ab09665.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v6-Prose/fa88bc37-eb6b-4d69-8983-7a489ab09665.json deleted file mode 100644 index 93e2861fbb371525ade40beb685af124b5fa4019..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v6-Prose/fa88bc37-eb6b-4d69-8983-7a489ab09665.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v6-Prose/1762652580.530398", - "retrieved_timestamp": "1762652580.530399", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v6-Prose", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v6-Prose" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5990073006289978 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6543750230807198 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3564954682779456 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3884228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48865625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5370678191489362 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v8/9332e745-f594-40a9-af22-98709efc179d.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v8/9332e745-f594-40a9-af22-98709efc179d.json deleted file mode 100644 index 777ff7dce5453d29e3da608467010102d97274ad..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v8/9332e745-f594-40a9-af22-98709efc179d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v8/1762652580.530813", - "retrieved_timestamp": "1762652580.530813", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v8", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v8" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5913387589373973 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6522455361956444 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40483383685800606 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47678125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.543467420212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v9/65c35557-ec37-49c3-b7f6-11ce837500f0.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v9/65c35557-ec37-49c3-b7f6-11ce837500f0.json deleted file mode 100644 index 6d6ac2297ab0663a3d7b94c10147010ccf276076..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v9/65c35557-ec37-49c3-b7f6-11ce837500f0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v9/1762652580.531015", - "retrieved_timestamp": "1762652580.5310159", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Qwenvergence-14B-v9", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sometimesanotion/Qwenvergence-14B-v9" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6598070896332842 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6165623747365094 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41389728096676737 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36828859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5141145833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5110538563829787 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_Qwenftmodel/aece90fe-f0eb-4c34-afd0-7a4fc36dc385.json b/leaderboard_data/HFOpenLLMv2/alibaba/sumink_Qwenftmodel/aece90fe-f0eb-4c34-afd0-7a4fc36dc385.json deleted file mode 100644 index 257279a6c185454baf3ef5d83990580d30b477dc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_Qwenftmodel/aece90fe-f0eb-4c34-afd0-7a4fc36dc385.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sumink_Qwenftmodel/1762652580.5454028", - "retrieved_timestamp": "1762652580.545404", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sumink/Qwenftmodel", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sumink/Qwenftmodel" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17290899258412123 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38226970256668574 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36171875000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23387632978723405 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_Qwenmplus/fc41cf78-6547-4fe6-83aa-ef5edd99a392.json b/leaderboard_data/HFOpenLLMv2/alibaba/sumink_Qwenmplus/fc41cf78-6547-4fe6-83aa-ef5edd99a392.json deleted file mode 100644 index d9b4c88e761be8c1b23d53c53d4efd560f548ca5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_Qwenmplus/fc41cf78-6547-4fe6-83aa-ef5edd99a392.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sumink_Qwenmplus/1762652580.5456882", - "retrieved_timestamp": "1762652580.545689", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sumink/Qwenmplus", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sumink/Qwenmplus" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20403307668098425 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3675511408391697 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38283333333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19921875 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.543 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_Qwensci/57a9ff0c-795f-45c4-b0c7-ad0c7400c88d.json b/leaderboard_data/HFOpenLLMv2/alibaba/sumink_Qwensci/57a9ff0c-795f-45c4-b0c7-ad0c7400c88d.json deleted file mode 100644 index 2327f4066088579f83a035720be75797d08dda3a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_Qwensci/57a9ff0c-795f-45c4-b0c7-ad0c7400c88d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sumink_Qwensci/1762652580.545888", - "retrieved_timestamp": "1762652580.5458891", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sumink/Qwensci", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sumink/Qwensci" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17398281005509825 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3281870591856875 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3608854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12599734042553193 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.543 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen/7c73720a-03d8-4d90-9557-cd579c7c3e86.json b/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen/7c73720a-03d8-4d90-9557-cd579c7c3e86.json deleted file mode 100644 index 7f96fc0af2bb90cc4c43d4b1d5f0b52997cc37a1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen/7c73720a-03d8-4d90-9557-cd579c7c3e86.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sumink_bbhqwen/1762652580.546088", - "retrieved_timestamp": "1762652580.546089", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sumink/bbhqwen", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sumink/bbhqwen" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18085236062536292 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3388245916050106 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43523958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16165226063829788 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen2/b4dbcb3f-11dd-4bce-9d45-869ae7c8f9b1.json b/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen2/b4dbcb3f-11dd-4bce-9d45-869ae7c8f9b1.json deleted file mode 100644 index f2b27274d2bc49666d521b0aad0e7ebd62a5ad77..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen2/b4dbcb3f-11dd-4bce-9d45-869ae7c8f9b1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sumink_bbhqwen2/1762652580.546288", - "retrieved_timestamp": "1762652580.546289", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sumink/bbhqwen2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sumink/bbhqwen2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15329991090307052 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30663248168563745 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44305208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1149434840425532 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen3/b9dae1c0-8088-4ffb-9e91-0f6579b3147e.json b/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen3/b9dae1c0-8088-4ffb-9e91-0f6579b3147e.json deleted file mode 100644 index 16e47c18d7ead9777b047cc66686de6cf4a769fa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen3/b9dae1c0-8088-4ffb-9e91-0f6579b3147e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sumink_bbhqwen3/1762652580.546491", - "retrieved_timestamp": "1762652580.546491", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sumink/bbhqwen3", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sumink/bbhqwen3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1942911474886634 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2950842029929075 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3796145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11660571808510638 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen4/336dbfac-133a-46c8-87c9-40f1ad12a714.json b/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen4/336dbfac-133a-46c8-87c9-40f1ad12a714.json deleted file mode 100644 index 5745c3da97512b0c39539b648353f38fcb47f5aa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen4/336dbfac-133a-46c8-87c9-40f1ad12a714.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sumink_bbhqwen4/1762652580.546697", - "retrieved_timestamp": "1762652580.546698", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sumink/bbhqwen4", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sumink/bbhqwen4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14485675784695717 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3199395559502713 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24412751677852348 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4028958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15093085106382978 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen5/4b528bc8-e94a-4437-8c1c-bcd823bf5f45.json b/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen5/4b528bc8-e94a-4437-8c1c-bcd823bf5f45.json deleted file mode 100644 index 91f789e2b21587697af42d5e03a06cf7c250385d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen5/4b528bc8-e94a-4437-8c1c-bcd823bf5f45.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sumink_bbhqwen5/1762652580.546902", - "retrieved_timestamp": "1762652580.5469031", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sumink/bbhqwen5", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sumink/bbhqwen5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1521507378200951 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29130964476405813 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4019375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11311502659574468 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen6/f585e5fe-c3b5-4134-97ed-67b57d74adb8.json b/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen6/f585e5fe-c3b5-4134-97ed-67b57d74adb8.json deleted file mode 100644 index 56fe07f2459eabce439249902ec4ad8fcb25083f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen6/f585e5fe-c3b5-4134-97ed-67b57d74adb8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sumink_bbhqwen6/1762652580.547101", - "retrieved_timestamp": "1762652580.547102", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sumink/bbhqwen6", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "sumink/bbhqwen6" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18929551368147626 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2782242419852629 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35796875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11527593085106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/synergetic_FrankenQwen2.5-14B/5f69b85b-d66c-400b-8d40-58b96233ec3c.json b/leaderboard_data/HFOpenLLMv2/alibaba/synergetic_FrankenQwen2.5-14B/5f69b85b-d66c-400b-8d40-58b96233ec3c.json deleted file mode 100644 index 80f877e727d5e4db1c5931960d7f515209baefca..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/synergetic_FrankenQwen2.5-14B/5f69b85b-d66c-400b-8d40-58b96233ec3c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/synergetic_FrankenQwen2.5-14B/1762652580.5505831", - "retrieved_timestamp": "1762652580.550584", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "synergetic/FrankenQwen2.5-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "synergetic/FrankenQwen2.5-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1869472998311148 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6047748435655343 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3842604166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43816489361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 16.972 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/tensopolis_qwen2.5-14b-tensopolis-v1/a3ff3d30-5dec-4ec3-87b9-004d570b005a.json b/leaderboard_data/HFOpenLLMv2/alibaba/tensopolis_qwen2.5-14b-tensopolis-v1/a3ff3d30-5dec-4ec3-87b9-004d570b005a.json deleted file mode 100644 index 65453582a854add385c6912bb159aa01d0062ccb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/tensopolis_qwen2.5-14b-tensopolis-v1/a3ff3d30-5dec-4ec3-87b9-004d570b005a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tensopolis_qwen2.5-14b-tensopolis-v1/1762652580.556658", - "retrieved_timestamp": "1762652580.556659", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tensopolis/qwen2.5-14b-tensopolis-v1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "tensopolis/qwen2.5-14b-tensopolis-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7990166092634211 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6363595324538928 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5294561933534743 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41933333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49110704787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/tensopolis_qwen2.5-3b-or1-tensopolis/b79e1f6d-698d-4bde-b35f-3f31e09c9d6a.json b/leaderboard_data/HFOpenLLMv2/alibaba/tensopolis_qwen2.5-3b-or1-tensopolis/b79e1f6d-698d-4bde-b35f-3f31e09c9d6a.json deleted file mode 100644 index 44721a160c3716893bb047a0d4127c77c6d1cc8c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/tensopolis_qwen2.5-3b-or1-tensopolis/b79e1f6d-698d-4bde-b35f-3f31e09c9d6a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tensopolis_qwen2.5-3b-or1-tensopolis/1762652580.556941", - "retrieved_timestamp": "1762652580.556942", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tensopolis/qwen2.5-3b-or1-tensopolis", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "tensopolis/qwen2.5-3b-or1-tensopolis" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35400958346077294 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44214988544006467 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1729607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37492708333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3197307180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/tensopolis_qwen2.5-7b-tensopolis-v1/20854e9f-ba11-492c-8d81-08e13ca1ec35.json b/leaderboard_data/HFOpenLLMv2/alibaba/tensopolis_qwen2.5-7b-tensopolis-v1/20854e9f-ba11-492c-8d81-08e13ca1ec35.json deleted file mode 100644 index 0c2625ca5f84dad20dd4d25980ea9c1b0f85c307..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/tensopolis_qwen2.5-7b-tensopolis-v1/20854e9f-ba11-492c-8d81-08e13ca1ec35.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tensopolis_qwen2.5-7b-tensopolis-v1/1762652580.5571609", - "retrieved_timestamp": "1762652580.557162", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tensopolis/qwen2.5-7b-tensopolis-v1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "tensopolis/qwen2.5-7b-tensopolis-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7660939640154789 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5378740884658956 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4561933534743202 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.433875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42686170212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/tensopolis_qwen2.5-7b-tensopolis-v2/e7862d19-b3d4-47f6-b174-b53015229a42.json b/leaderboard_data/HFOpenLLMv2/alibaba/tensopolis_qwen2.5-7b-tensopolis-v2/e7862d19-b3d4-47f6-b174-b53015229a42.json deleted file mode 100644 index aa6fb06cf6e0b27d8fb91476e77d062103f5f3e8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/tensopolis_qwen2.5-7b-tensopolis-v2/e7862d19-b3d4-47f6-b174-b53015229a42.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tensopolis_qwen2.5-7b-tensopolis-v2/1762652580.5574138", - "retrieved_timestamp": "1762652580.5574138", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tensopolis/qwen2.5-7b-tensopolis-v2", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "tensopolis/qwen2.5-7b-tensopolis-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.752105524452896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5414622323974015 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42463541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42428523936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/theprint_ReWiz-Qwen-2.5-14B/9a4e6a55-e39e-4da6-b4bb-670cbd75d5c6.json b/leaderboard_data/HFOpenLLMv2/alibaba/theprint_ReWiz-Qwen-2.5-14B/9a4e6a55-e39e-4da6-b4bb-670cbd75d5c6.json deleted file mode 100644 index 36a763cdb4ef94100b0b7152a42ff50814ccb9ba..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/theprint_ReWiz-Qwen-2.5-14B/9a4e6a55-e39e-4da6-b4bb-670cbd75d5c6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/theprint_ReWiz-Qwen-2.5-14B/1762652580.563489", - "retrieved_timestamp": "1762652580.5634902", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "theprint/ReWiz-Qwen-2.5-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "theprint/ReWiz-Qwen-2.5-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27854647889821227 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6179492756426455 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29229607250755285 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45389583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5092253989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 16.743 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/thomas-yanxin_XinYuan-Qwen2-1_5B/626a924c-618b-4047-bed3-9ff67b6e47ae.json b/leaderboard_data/HFOpenLLMv2/alibaba/thomas-yanxin_XinYuan-Qwen2-1_5B/626a924c-618b-4047-bed3-9ff67b6e47ae.json deleted file mode 100644 index 0f910635fe4ee274c1f0a47760dbb8007f92ce72..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/thomas-yanxin_XinYuan-Qwen2-1_5B/626a924c-618b-4047-bed3-9ff67b6e47ae.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/thomas-yanxin_XinYuan-Qwen2-1_5B/1762652580.565519", - "retrieved_timestamp": "1762652580.565519", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "thomas-yanxin/XinYuan-Qwen2-1_5B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "thomas-yanxin/XinYuan-Qwen2-1_5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2985556102253133 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3635491993150823 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36339583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23570478723404256 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/thomas-yanxin_XinYuan-Qwen2-7B-0917/0fac57c3-7bea-48fc-bb38-b679ab835d91.json b/leaderboard_data/HFOpenLLMv2/alibaba/thomas-yanxin_XinYuan-Qwen2-7B-0917/0fac57c3-7bea-48fc-bb38-b679ab835d91.json deleted file mode 100644 index d52ef15b5fd9c6aaac6e28d30a40a33dc379b4f0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/thomas-yanxin_XinYuan-Qwen2-7B-0917/0fac57c3-7bea-48fc-bb38-b679ab835d91.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/thomas-yanxin_XinYuan-Qwen2-7B-0917/1762652580.56599", - "retrieved_timestamp": "1762652580.565991", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "thomas-yanxin/XinYuan-Qwen2-7B-0917", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "thomas-yanxin/XinYuan-Qwen2-7B-0917" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37191983935956596 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5169215573786009 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19788519637462235 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4401041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4245345744680851 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/thomas-yanxin_XinYuan-Qwen2-7B/5e0690cd-21e6-4778-8af9-7d9f623f5f52.json b/leaderboard_data/HFOpenLLMv2/alibaba/thomas-yanxin_XinYuan-Qwen2-7B/5e0690cd-21e6-4778-8af9-7d9f623f5f52.json deleted file mode 100644 index 6cc03251a26db181f7efeee45c3ef48c8d9c6974..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/thomas-yanxin_XinYuan-Qwen2-7B/5e0690cd-21e6-4778-8af9-7d9f623f5f52.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/thomas-yanxin_XinYuan-Qwen2-7B/1762652580.565779", - "retrieved_timestamp": "1762652580.56578", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "thomas-yanxin/XinYuan-Qwen2-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "thomas-yanxin/XinYuan-Qwen2-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44376033369238066 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4936629157238895 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14577039274924472 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40581249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3924534574468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/thomas-yanxin_XinYuan-Qwen2.5-7B-0917/6dc1a4e7-6ce6-4337-a242-420fe4139538.json b/leaderboard_data/HFOpenLLMv2/alibaba/thomas-yanxin_XinYuan-Qwen2.5-7B-0917/6dc1a4e7-6ce6-4337-a242-420fe4139538.json deleted file mode 100644 index 7e83a64a6b8ed41138b1d401c92887de5532c592..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/thomas-yanxin_XinYuan-Qwen2.5-7B-0917/6dc1a4e7-6ce6-4337-a242-420fe4139538.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/thomas-yanxin_XinYuan-Qwen2.5-7B-0917/1762652580.5662022", - "retrieved_timestamp": "1762652580.5662029", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "thomas-yanxin/XinYuan-Qwen2.5-7B-0917", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "thomas-yanxin/XinYuan-Qwen2.5-7B-0917" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35770644113175265 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5184106116987492 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3675520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38821476063829785 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/v000000_Qwen2.5-14B-Gutenberg-1e-Delta/676745af-1929-4875-9a78-d57354883d75.json b/leaderboard_data/HFOpenLLMv2/alibaba/v000000_Qwen2.5-14B-Gutenberg-1e-Delta/676745af-1929-4875-9a78-d57354883d75.json deleted file mode 100644 index 904f2bd43c0b2c5a2f1e6ba9dccd1370f6fbd163..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/v000000_Qwen2.5-14B-Gutenberg-1e-Delta/676745af-1929-4875-9a78-d57354883d75.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/v000000_Qwen2.5-14B-Gutenberg-1e-Delta/1762652580.584905", - "retrieved_timestamp": "1762652580.584906", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "v000000/Qwen2.5-14B-Gutenberg-1e-Delta", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "v000000/Qwen2.5-14B-Gutenberg-1e-Delta" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8045120280854798 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.639849930188539 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5264350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40730208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4930186170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/v000000_Qwen2.5-Lumen-14B/7b134cb3-7794-4984-9240-b889e2a3b6b4.json b/leaderboard_data/HFOpenLLMv2/alibaba/v000000_Qwen2.5-Lumen-14B/7b134cb3-7794-4984-9240-b889e2a3b6b4.json deleted file mode 100644 index 36d14a54efe3fec3736754f6320b80476cbbd57d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/v000000_Qwen2.5-Lumen-14B/7b134cb3-7794-4984-9240-b889e2a3b6b4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/v000000_Qwen2.5-Lumen-14B/1762652580.585356", - "retrieved_timestamp": "1762652580.585357", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "v000000/Qwen2.5-Lumen-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "v000000/Qwen2.5-Lumen-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8063604569209697 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6390809511149668 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41139583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49027593085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/vonjack_Qwen2.5-Coder-0.5B-Merged/76b52fe1-c232-47d9-8052-077a945364cd.json b/leaderboard_data/HFOpenLLMv2/alibaba/vonjack_Qwen2.5-Coder-0.5B-Merged/76b52fe1-c232-47d9-8052-077a945364cd.json deleted file mode 100644 index bdc68cf09663160eee3921641c80bb65db3101fa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/vonjack_Qwen2.5-Coder-0.5B-Merged/76b52fe1-c232-47d9-8052-077a945364cd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/vonjack_Qwen2.5-Coder-0.5B-Merged/1762652580.5902011", - "retrieved_timestamp": "1762652580.590202", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "vonjack/Qwen2.5-Coder-0.5B-Merged", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "vonjack/Qwen2.5-Coder-0.5B-Merged" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30997087727230416 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3076017752057237 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33034375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12017952127659574 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/wave-on-discord_qwent-7b/1dc524b8-18d6-4bc0-9146-713ef8abd983.json b/leaderboard_data/HFOpenLLMv2/alibaba/wave-on-discord_qwent-7b/1dc524b8-18d6-4bc0-9146-713ef8abd983.json deleted file mode 100644 index e0a7281b5462c7f6e082a8f4eacc3b75a44d0932..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/wave-on-discord_qwent-7b/1dc524b8-18d6-4bc0-9146-713ef8abd983.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/wave-on-discord_qwent-7b/1762652580.592784", - "retrieved_timestamp": "1762652580.592785", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "wave-on-discord/qwent-7b", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "wave-on-discord/qwent-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20148539209297997 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4228103286118343 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38165625000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16032247340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/win10_EVA-Norns-Qwen2.5-v0.1/5b8044df-ce6a-4a5e-9aed-d657188fa114.json b/leaderboard_data/HFOpenLLMv2/alibaba/win10_EVA-Norns-Qwen2.5-v0.1/5b8044df-ce6a-4a5e-9aed-d657188fa114.json deleted file mode 100644 index 7f7e56a32ac252a6e18427673d9f959b1470153d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/win10_EVA-Norns-Qwen2.5-v0.1/5b8044df-ce6a-4a5e-9aed-d657188fa114.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/win10_EVA-Norns-Qwen2.5-v0.1/1762652580.594388", - "retrieved_timestamp": "1762652580.594388", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "win10/EVA-Norns-Qwen2.5-v0.1", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "win10/EVA-Norns-Qwen2.5-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6219630580193884 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.507240838017382 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26132930513595165 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40451041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3425033244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/win10_Norns-Qwen2.5-12B/4ff2e991-ee62-467e-9fec-cdf334ca7fca.json b/leaderboard_data/HFOpenLLMv2/alibaba/win10_Norns-Qwen2.5-12B/4ff2e991-ee62-467e-9fec-cdf334ca7fca.json deleted file mode 100644 index f3cfda531f02b3de8f2b446ecd2f0b19bda12e37..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/win10_Norns-Qwen2.5-12B/4ff2e991-ee62-467e-9fec-cdf334ca7fca.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/win10_Norns-Qwen2.5-12B/1762652580.594881", - "retrieved_timestamp": "1762652580.594882", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "win10/Norns-Qwen2.5-12B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "win10/Norns-Qwen2.5-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48969733640074997 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46189201103923744 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08383685800604229 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3554895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2660405585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 12.277 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/win10_Norns-Qwen2.5-7B/2451252e-2cf6-4394-9009-544630696c75.json b/leaderboard_data/HFOpenLLMv2/alibaba/win10_Norns-Qwen2.5-7B/2451252e-2cf6-4394-9009-544630696c75.json deleted file mode 100644 index 3c29c9340c529ed110a7efe45e8a7723a28dc757..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/win10_Norns-Qwen2.5-7B/2451252e-2cf6-4394-9009-544630696c75.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/win10_Norns-Qwen2.5-7B/1762652580.5950878", - "retrieved_timestamp": "1762652580.595089", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "win10/Norns-Qwen2.5-7B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "win10/Norns-Qwen2.5-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6122211288270678 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5072887832228614 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2628398791540785 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40847916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34133976063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/x0000001_Deepseek-Lumen-R1-Qwen2.5-14B/9d6eb7bc-965e-4de8-bccf-0590ad55ce6d.json b/leaderboard_data/HFOpenLLMv2/alibaba/x0000001_Deepseek-Lumen-R1-Qwen2.5-14B/9d6eb7bc-965e-4de8-bccf-0590ad55ce6d.json deleted file mode 100644 index f5e00a60b156d32cc754db477e870d4fa257f2d2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alibaba/x0000001_Deepseek-Lumen-R1-Qwen2.5-14B/9d6eb7bc-965e-4de8-bccf-0590ad55ce6d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/x0000001_Deepseek-Lumen-R1-Qwen2.5-14B/1762652580.596637", - "retrieved_timestamp": "1762652580.596638", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "x0000001/Deepseek-Lumen-R1-Qwen2.5-14B", - "developer": "alibaba", - "inference_platform": "unknown", - "id": "x0000001/Deepseek-Lumen-R1-Qwen2.5-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4436107306391486 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45690468424066283 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27794561933534745 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47396875000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4379155585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allenai/allenai_Llama-3.1-Tulu-3-70B-DPO/b790e9c5-2412-4aa0-a975-37b8662a82cf.json b/leaderboard_data/HFOpenLLMv2/allenai/allenai_Llama-3.1-Tulu-3-70B-DPO/b790e9c5-2412-4aa0-a975-37b8662a82cf.json deleted file mode 100644 index 570d57724e141158892cb45157e9bb6fcc19afb5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allenai/allenai_Llama-3.1-Tulu-3-70B-DPO/b790e9c5-2412-4aa0-a975-37b8662a82cf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-70B-DPO/1762652579.9821", - "retrieved_timestamp": "1762652579.982101", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allenai/Llama-3.1-Tulu-3-70B-DPO", - "developer": "allenai", - "inference_platform": "unknown", - "id": "allenai/Llama-3.1-Tulu-3-70B-DPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8281925291559729 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6146203626958501 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44939577039274925 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37583892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4922604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4632646276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allenai/allenai_Llama-3.1-Tulu-3-70B-SFT/6921281e-5756-4f0d-a37c-3b05ff6b2703.json b/leaderboard_data/HFOpenLLMv2/allenai/allenai_Llama-3.1-Tulu-3-70B-SFT/6921281e-5756-4f0d-a37c-3b05ff6b2703.json deleted file mode 100644 index 7905f87c57c9e677e1ee864a429c7d291c819401..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allenai/allenai_Llama-3.1-Tulu-3-70B-SFT/6921281e-5756-4f0d-a37c-3b05ff6b2703.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-70B-SFT/1762652579.982346", - "retrieved_timestamp": "1762652579.982346", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allenai/Llama-3.1-Tulu-3-70B-SFT", - "developer": "allenai", - "inference_platform": "unknown", - "id": "allenai/Llama-3.1-Tulu-3-70B-SFT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8050616807847621 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5951437800580934 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33157099697885195 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447986577181208 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5026145833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46243351063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allenai/allenai_Llama-3.1-Tulu-3-8B-DPO/81bd1edf-be5b-4ae6-a2cc-723aaa040eb9.json b/leaderboard_data/HFOpenLLMv2/allenai/allenai_Llama-3.1-Tulu-3-8B-DPO/81bd1edf-be5b-4ae6-a2cc-723aaa040eb9.json deleted file mode 100644 index 6d43708bec77d22e403250d49a632b9e20a10751..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allenai/allenai_Llama-3.1-Tulu-3-8B-DPO/81bd1edf-be5b-4ae6-a2cc-723aaa040eb9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-8B-DPO/1762652579.9829278", - "retrieved_timestamp": "1762652579.982929", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allenai/Llama-3.1-Tulu-3-8B-DPO", - "developer": "allenai", - "inference_platform": "unknown", - "id": "allenai/Llama-3.1-Tulu-3-8B-DPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8029384255996312 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4079428557044153 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.236404833836858 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41613541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2898105053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allenai/allenai_Llama-3.1-Tulu-3-8B-SFT/35674acb-a68c-4ac1-9aac-ac9cb44801e6.json b/leaderboard_data/HFOpenLLMv2/allenai/allenai_Llama-3.1-Tulu-3-8B-SFT/35674acb-a68c-4ac1-9aac-ac9cb44801e6.json deleted file mode 100644 index 736b53a5e051744117d64774cee528a8556d91a3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allenai/allenai_Llama-3.1-Tulu-3-8B-SFT/35674acb-a68c-4ac1-9aac-ac9cb44801e6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-8B-SFT/1762652579.983397", - "retrieved_timestamp": "1762652579.983398", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allenai/Llama-3.1-Tulu-3-8B-SFT", - "developer": "allenai", - "inference_platform": "unknown", - "id": "allenai/Llama-3.1-Tulu-3-8B-SFT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7403400754442657 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3871863270501647 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4267708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28116688829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-1.7-7B-hf/5d7caae7-0242-4a5d-b3be-c677b958d130.json b/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-1.7-7B-hf/5d7caae7-0242-4a5d-b3be-c677b958d130.json deleted file mode 100644 index 167c2b63d0e1e6452c2ad64721cdad8468c0eb7b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-1.7-7B-hf/5d7caae7-0242-4a5d-b3be-c677b958d130.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allenai_OLMo-1.7-7B-hf/1762652579.9836009", - "retrieved_timestamp": "1762652579.9836018", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allenai/OLMo-1.7-7B-hf", - "developer": "allenai", - "inference_platform": "unknown", - "id": "allenai/OLMo-1.7-7B-hf" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1568970332052288 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3013695911207614 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34748958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11236702127659574 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Unknown", - "params_billions": 0.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-1B-hf/d13f5416-1d95-431b-8f01-b969066ec960.json b/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-1B-hf/d13f5416-1d95-431b-8f01-b969066ec960.json deleted file mode 100644 index b986f095113ea0c317a3e4fac048bbea02896b9f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-1B-hf/d13f5416-1d95-431b-8f01-b969066ec960.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allenai_OLMo-1B-hf/1762652579.983823", - "retrieved_timestamp": "1762652579.983823", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allenai/OLMo-1B-hf", - "developer": "allenai", - "inference_platform": "unknown", - "id": "allenai/OLMo-1B-hf" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21819660722438686 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30519468988429327 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40978125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11735372340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "OlmoForCausalLM", - "params_billions": 1.177 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-2-1124-7B-Instruct/17df660f-6a91-476f-a7e8-7169eef1c24d.json b/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-2-1124-7B-Instruct/17df660f-6a91-476f-a7e8-7169eef1c24d.json deleted file mode 100644 index ccfccbab9d7a51f8aa2fc5d0864a02e6bf375f48..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-2-1124-7B-Instruct/17df660f-6a91-476f-a7e8-7169eef1c24d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allenai_OLMo-2-1124-7B-Instruct/1762652579.9840362", - "retrieved_timestamp": "1762652579.9840372", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allenai/OLMo-2-1124-7B-Instruct", - "developer": "allenai", - "inference_platform": "unknown", - "id": "allenai/OLMo-2-1124-7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7244034716773715 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40223602474417786 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1487915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35083333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2672041223404255 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Olmo2ForCausalLM", - "params_billions": 7.299 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-7B-Instruct-hf/7ff78ffd-c934-4a17-b30d-2d8267f3e25a.json b/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-7B-Instruct-hf/7ff78ffd-c934-4a17-b30d-2d8267f3e25a.json deleted file mode 100644 index a9e86c4b2d282e42473de60f2d04353f76049634..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-7B-Instruct-hf/7ff78ffd-c934-4a17-b30d-2d8267f3e25a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allenai_OLMo-7B-Instruct-hf/1762652579.98445", - "retrieved_timestamp": "1762652579.984452", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allenai/OLMo-7B-Instruct-hf", - "developer": "allenai", - "inference_platform": "unknown", - "id": "allenai/OLMo-7B-Instruct-hf" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3472652561869174 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3706469866662716 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37647916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17852393617021275 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "OlmoForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-7B-hf/6308f97d-aecd-467a-91f0-5a1650ccc22a.json b/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-7B-hf/6308f97d-aecd-467a-91f0-5a1650ccc22a.json deleted file mode 100644 index 3e9e0c916d208d022edc5b521766d72f451df1ba..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-7B-hf/6308f97d-aecd-467a-91f0-5a1650ccc22a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allenai_OLMo-7B-hf/1762652579.984753", - "retrieved_timestamp": "1762652579.984753", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allenai/OLMo-7B-hf", - "developer": "allenai", - "inference_platform": "unknown", - "id": "allenai/OLMo-7B-hf" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2719273749207658 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32791316587362274 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3486666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11727061170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "OlmoForCausalLM", - "params_billions": 6.888 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMoE-1B-7B-0125-Instruct/af176c4c-b06f-44ac-bcba-1331d9148958.json b/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMoE-1B-7B-0125-Instruct/af176c4c-b06f-44ac-bcba-1331d9148958.json deleted file mode 100644 index ac6d05767eb643a4ddb192ef0f39533ed757874d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMoE-1B-7B-0125-Instruct/af176c4c-b06f-44ac-bcba-1331d9148958.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allenai_OLMoE-1B-7B-0125-Instruct/1762652579.984983", - "retrieved_timestamp": "1762652579.984983", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allenai/OLMoE-1B-7B-0125-Instruct", - "developer": "allenai", - "inference_platform": "unknown", - "id": "allenai/OLMoE-1B-7B-0125-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6757436934001781 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38245348916008676 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3635833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19148936170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "OlmoeForCausalLM", - "params_billions": 6.919 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMoE-1B-7B-0924-Instruct/a580b690-0829-43b9-8d52-6dd226208901.json b/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMoE-1B-7B-0924-Instruct/a580b690-0829-43b9-8d52-6dd226208901.json deleted file mode 100644 index 5545b69e092f9ca39c90eb203cf37300decf471f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMoE-1B-7B-0924-Instruct/a580b690-0829-43b9-8d52-6dd226208901.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allenai_OLMoE-1B-7B-0924-Instruct/1762652579.98542", - "retrieved_timestamp": "1762652579.98542", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allenai/OLMoE-1B-7B-0924-Instruct", - "developer": "allenai", - "inference_platform": "unknown", - "id": "allenai/OLMoE-1B-7B-0924-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4667415790103592 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3901610626816106 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3848229166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18758311170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "OlmoeForCausalLM", - "params_billions": 6.919 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMoE-1B-7B-0924/af1bb542-77cb-47e2-89f1-16cc91e89452.json b/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMoE-1B-7B-0924/af1bb542-77cb-47e2-89f1-16cc91e89452.json deleted file mode 100644 index c6ede33c39b1f7ba0fb14e24cc54167a2616eae8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMoE-1B-7B-0924/af1bb542-77cb-47e2-89f1-16cc91e89452.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allenai_OLMoE-1B-7B-0924/1762652579.985209", - "retrieved_timestamp": "1762652579.9852102", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allenai/OLMoE-1B-7B-0924", - "developer": "allenai", - "inference_platform": "unknown", - "id": "allenai/OLMoE-1B-7B-0924" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21847143357402804 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3393437931177341 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34879166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1739527925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "OlmoeForCausalLM", - "params_billions": 6.919 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Chocolatine-24B/9d3d89f9-e792-4b33-91d1-41f84ca1cc68.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Chocolatine-24B/9d3d89f9-e792-4b33-91d1-41f84ca1cc68.json deleted file mode 100644 index 75060bc0ecdbda92bd446579f94a9c64669e9909..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Chocolatine-24B/9d3d89f9-e792-4b33-91d1-41f84ca1cc68.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Chocolatine-24B/1762652579.9856288", - "retrieved_timestamp": "1762652579.98563", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Chocolatine-24B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Chocolatine-24B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19581488229010136 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6191260063262436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43232291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4566156914893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 24.184 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_HomerSlerp1-7B/340dfc7b-9af0-4545-9d7b-6950ea69bd57.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_HomerSlerp1-7B/340dfc7b-9af0-4545-9d7b-6950ea69bd57.json deleted file mode 100644 index adc973a8ed443cb81f444d7ffb948602a0c17932..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_HomerSlerp1-7B/340dfc7b-9af0-4545-9d7b-6950ea69bd57.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_HomerSlerp1-7B/1762652579.988248", - "retrieved_timestamp": "1762652579.988249", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/HomerSlerp1-7B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/HomerSlerp1-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46212050692163464 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.551818027489446 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2719033232628399 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43585416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4503823138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_HomerSlerp2-7B/ea9cc238-75d0-45e7-b10e-e214516ca36e.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_HomerSlerp2-7B/ea9cc238-75d0-45e7-b10e-e214516ca36e.json deleted file mode 100644 index 2ff386ed6ac2775beef895830a5cd894f3b74d57..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_HomerSlerp2-7B/ea9cc238-75d0-45e7-b10e-e214516ca36e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_HomerSlerp2-7B/1762652579.988459", - "retrieved_timestamp": "1762652579.98846", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/HomerSlerp2-7B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/HomerSlerp2-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44868172005833407 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5648943315947 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43557291666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45146276595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_HomerSlerp3-7B/a8a69b0c-02c9-437d-975d-69f1ddc6959a.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_HomerSlerp3-7B/a8a69b0c-02c9-437d-975d-69f1ddc6959a.json deleted file mode 100644 index 50e886a4f316469757e6acb1890a0a54afd8501c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_HomerSlerp3-7B/a8a69b0c-02c9-437d-975d-69f1ddc6959a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_HomerSlerp3-7B/1762652579.988729", - "retrieved_timestamp": "1762652579.9887302", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/HomerSlerp3-7B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/HomerSlerp3-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4362668829815999 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5598063466560873 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3021148036253776 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44617708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45345744680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_HomerSlerp4-7B/988da677-c00d-4e7c-847e-6ca553e0124b.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_HomerSlerp4-7B/988da677-c00d-4e7c-847e-6ca553e0124b.json deleted file mode 100644 index fd6785d19ed3a6c0dbabfb0c0adc9cb5cb858cf5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_HomerSlerp4-7B/988da677-c00d-4e7c-847e-6ca553e0124b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_HomerSlerp4-7B/1762652579.988936", - "retrieved_timestamp": "1762652579.988937", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/HomerSlerp4-7B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/HomerSlerp4-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43741605606457534 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5570767234678723 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3270392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44084375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44722406914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_LimyQstar-7B-slerp/ac45b8ec-454f-4a91-9418-a3dc70535119.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_LimyQstar-7B-slerp/ac45b8ec-454f-4a91-9418-a3dc70535119.json deleted file mode 100644 index 9e97b0bd249b7ddf456363f3f13695ccbb4d1b3d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_LimyQstar-7B-slerp/ac45b8ec-454f-4a91-9418-a3dc70535119.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_LimyQstar-7B-slerp/1762652579.98914", - "retrieved_timestamp": "1762652579.989141", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/LimyQstar-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/LimyQstar-7B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34911368502240725 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5023559424245442 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4146458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3103390957446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Marco-01-slerp1-7B/1b8abf32-6b66-4e9b-9b82-e1978d07a483.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Marco-01-slerp1-7B/1b8abf32-6b66-4e9b-9b82-e1978d07a483.json deleted file mode 100644 index d5a360d486905bd4dd42cf8aa0552c795883bee8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Marco-01-slerp1-7B/1b8abf32-6b66-4e9b-9b82-e1978d07a483.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Marco-01-slerp1-7B/1762652579.989768", - "retrieved_timestamp": "1762652579.98977", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Marco-01-slerp1-7B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Marco-01-slerp1-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46811571075856506 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5540943469864194 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4451875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44830452127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Meme-7B-slerp/8eaa7d3f-0217-4ed3-9367-9e0f9c0926fe.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Meme-7B-slerp/8eaa7d3f-0217-4ed3-9367-9e0f9c0926fe.json deleted file mode 100644 index 87b5f1c807864a14df74c90bab03bbfc003ee9b3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Meme-7B-slerp/8eaa7d3f-0217-4ed3-9367-9e0f9c0926fe.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Meme-7B-slerp/1762652579.9900281", - "retrieved_timestamp": "1762652579.990029", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Meme-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Meme-7B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5163754393897082 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4660944195552204 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4223020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.281000664893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ministral-8B-slerp/effba194-3b2a-4847-9708-e3cb62a7c964.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ministral-8B-slerp/effba194-3b2a-4847-9708-e3cb62a7c964.json deleted file mode 100644 index e0c5b7973ea6fe25881d0edfa6615809240ca0ba..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ministral-8B-slerp/effba194-3b2a-4847-9708-e3cb62a7c964.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Ministral-8B-slerp/1762652579.990243", - "retrieved_timestamp": "1762652579.9902442", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Ministral-8B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Ministral-8B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19608970863974257 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4686018544963986 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42853125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3119182180851064 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MixTAO-19B-pass/275fb96e-4779-479b-937b-f5db6aa530ea.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MixTAO-19B-pass/275fb96e-4779-479b-937b-f5db6aa530ea.json deleted file mode 100644 index 66843533d540d1290cd9bb0500f31926ee2b4a07..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MixTAO-19B-pass/275fb96e-4779-479b-937b-f5db6aa530ea.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_MixTAO-19B-pass/1762652579.991234", - "retrieved_timestamp": "1762652579.991235", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/MixTAO-19B-pass", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MixTAO-19B-pass" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3814368098866563 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5128248798224987 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47827083333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31050531914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 19.188 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MixTaoTruthful-13B-slerp/003c05a1-abb7-41d3-a264-efc6923b64ef.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MixTaoTruthful-13B-slerp/003c05a1-abb7-41d3-a264-efc6923b64ef.json deleted file mode 100644 index c195b1ec8b99826f7e68ce972a9ab0b84b4b33da..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MixTaoTruthful-13B-slerp/003c05a1-abb7-41d3-a264-efc6923b64ef.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_MixTaoTruthful-13B-slerp/1762652579.991453", - "retrieved_timestamp": "1762652579.991454", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/MixTaoTruthful-13B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MixTaoTruthful-13B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41388515804731446 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5207335343585151 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42924999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3100066489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiCalm-7B-slerp/36176ae9-e852-4604-9961-b7f02e4c3e55.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiCalm-7B-slerp/36176ae9-e852-4604-9961-b7f02e4c3e55.json deleted file mode 100644 index 7be0632d917d103debdbc12eeda2ef187c37d485..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiCalm-7B-slerp/36176ae9-e852-4604-9961-b7f02e4c3e55.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_MultiCalm-7B-slerp/1762652579.991671", - "retrieved_timestamp": "1762652579.991672", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/MultiCalm-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MultiCalm-7B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3926526061960044 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5121891599770304 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43194791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3032746010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash-12B-slerp/ed27cd90-e73f-4432-aed9-dd36f29cba1a.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash-12B-slerp/ed27cd90-e73f-4432-aed9-dd36f29cba1a.json deleted file mode 100644 index 4b1e940114aff2a59ea0038fa09d7011a94dc3d4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash-12B-slerp/ed27cd90-e73f-4432-aed9-dd36f29cba1a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash-12B-slerp/1762652579.991891", - "retrieved_timestamp": "1762652579.9918919", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/MultiMash-12B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MultiMash-12B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39744876926554873 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5141827379810838 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08081570996978851 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44379166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3067652925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash10-13B-slerp/7e4b1f44-73f9-4a6d-9d66-91c60e69e3d2.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash10-13B-slerp/7e4b1f44-73f9-4a6d-9d66-91c60e69e3d2.json deleted file mode 100644 index e05563b6b1954ae33471e02a3c4d6b861c166875..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash10-13B-slerp/7e4b1f44-73f9-4a6d-9d66-91c60e69e3d2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash10-13B-slerp/1762652579.992115", - "retrieved_timestamp": "1762652579.992116", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/MultiMash10-13B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MultiMash10-13B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41628323958208663 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5186335995744094 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43179166666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3116688829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash11-13B-slerp/1b3bfb2a-8290-4af0-bdac-24397a5b6f86.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash11-13B-slerp/1b3bfb2a-8290-4af0-bdac-24397a5b6f86.json deleted file mode 100644 index 4ede973cae22eb8e8277a149c0af949c569f7686..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash11-13B-slerp/1b3bfb2a-8290-4af0-bdac-24397a5b6f86.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash11-13B-slerp/1762652579.992343", - "retrieved_timestamp": "1762652579.9923441", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/MultiMash11-13B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MultiMash11-13B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4251009543566625 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5193864686484946 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43728125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30851063829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash2-12B-slerp/af52a422-e959-4662-98e8-c94fa83bee3e.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash2-12B-slerp/af52a422-e959-4662-98e8-c94fa83bee3e.json deleted file mode 100644 index 2a5d0962bb8f911cfdd279f27cb76fe17b22da91..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash2-12B-slerp/af52a422-e959-4662-98e8-c94fa83bee3e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash2-12B-slerp/1762652579.992556", - "retrieved_timestamp": "1762652579.992556", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/MultiMash2-12B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MultiMash2-12B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42607503645881817 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5133973498532299 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4228020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3042719414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash5-12B-slerp/df7621bc-5af2-45c5-b8e4-ebc158dad966.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash5-12B-slerp/df7621bc-5af2-45c5-b8e4-ebc158dad966.json deleted file mode 100644 index 28ce8261d791abf981e0ba02c16267b12e81df91..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash5-12B-slerp/df7621bc-5af2-45c5-b8e4-ebc158dad966.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash5-12B-slerp/1762652579.992772", - "retrieved_timestamp": "1762652579.992772", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/MultiMash5-12B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MultiMash5-12B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41415998439695567 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5144534995858502 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4202916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30277593085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash6-12B-slerp/195b1c31-c766-479c-a445-39a6150404fc.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash6-12B-slerp/195b1c31-c766-479c-a445-39a6150404fc.json deleted file mode 100644 index 064be03c1e720f03a3bcb05909a6d133507baa66..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash6-12B-slerp/195b1c31-c766-479c-a445-39a6150404fc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash6-12B-slerp/1762652579.992992", - "retrieved_timestamp": "1762652579.992993", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/MultiMash6-12B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MultiMash6-12B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43004672047943904 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5195916915718951 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07250755287009064 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4305833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30909242021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash7-12B-slerp/141507b5-67df-4c38-9eeb-b9d3cf98b08f.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash7-12B-slerp/141507b5-67df-4c38-9eeb-b9d3cf98b08f.json deleted file mode 100644 index f83395c6127cd2a60be2cb3939ad76d2825b6180..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash7-12B-slerp/141507b5-67df-4c38-9eeb-b9d3cf98b08f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash7-12B-slerp/1762652579.993205", - "retrieved_timestamp": "1762652579.993206", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/MultiMash7-12B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MultiMash7-12B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42127887338927383 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5111135397195524 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06948640483383686 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42794791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3029421542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash8-13B-slerp/54a836bc-8048-4c2b-a65a-937acc2fa414.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash8-13B-slerp/54a836bc-8048-4c2b-a65a-937acc2fa414.json deleted file mode 100644 index a4512a92e89d8b79dd02bcdd13a20e7997adf564..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash8-13B-slerp/54a836bc-8048-4c2b-a65a-937acc2fa414.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash8-13B-slerp/1762652579.9938078", - "retrieved_timestamp": "1762652579.99381", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/MultiMash8-13B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MultiMash8-13B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4320702402957486 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5178483059643324 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0770392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4423958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31258311170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash9-13B-slerp/6a0f5973-6377-4707-a0e3-414ca1f22b32.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash9-13B-slerp/6a0f5973-6377-4707-a0e3-414ca1f22b32.json deleted file mode 100644 index 73796c0137c5421befee6ce520161ec7c4ee07f2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash9-13B-slerp/6a0f5973-6377-4707-a0e3-414ca1f22b32.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash9-13B-slerp/1762652579.994061", - "retrieved_timestamp": "1762652579.994061", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/MultiMash9-13B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MultiMash9-13B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4187810564856802 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5193579939678727 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4398229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3100066489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMerge-7B-slerp/f0aae363-f838-48c8-bf9e-b8e9f0e84a24.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMerge-7B-slerp/f0aae363-f838-48c8-bf9e-b8e9f0e84a24.json deleted file mode 100644 index 85a7faca97879786ea26cc99a3b9c245c35e286b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMerge-7B-slerp/f0aae363-f838-48c8-bf9e-b8e9f0e84a24.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMerge-7B-slerp/1762652579.994297", - "retrieved_timestamp": "1762652579.994299", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/MultiMerge-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MultiMerge-7B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3947758613811354 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5140224933103638 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42797916666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036901595744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Multimash3-12B-slerp/80aa0629-7ea1-4f69-b302-c0502abcbbab.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Multimash3-12B-slerp/80aa0629-7ea1-4f69-b302-c0502abcbbab.json deleted file mode 100644 index 4f3af69c40e382115a8815cf44529b0bc8ded335..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Multimash3-12B-slerp/80aa0629-7ea1-4f69-b302-c0502abcbbab.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Multimash3-12B-slerp/1762652579.994557", - "retrieved_timestamp": "1762652579.994557", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Multimash3-12B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Multimash3-12B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44371046600796993 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5176624678276028 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3067652925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Multimerge-19B-pass/818e21b8-da78-4649-a71a-ba71c89d1fe7.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Multimerge-19B-pass/818e21b8-da78-4649-a71a-ba71c89d1fe7.json deleted file mode 100644 index fc024ad33f064a89a18788fa510b9296bdc118c9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Multimerge-19B-pass/818e21b8-da78-4649-a71a-ba71c89d1fe7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Multimerge-19B-pass/1762652579.9948218", - "retrieved_timestamp": "1762652579.994823", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Multimerge-19B-pass", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Multimerge-19B-pass" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17730510600761534 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2891778102988436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3429583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11685505319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 19.188 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiverseEx26-7B-slerp/30b74d3f-7247-4c93-9c94-dc8beba14b70.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiverseEx26-7B-slerp/30b74d3f-7247-4c93-9c94-dc8beba14b70.json deleted file mode 100644 index d12bd02368cff20b40e3b473947d0a6b02051753..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiverseEx26-7B-slerp/30b74d3f-7247-4c93-9c94-dc8beba14b70.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_MultiverseEx26-7B-slerp/1762652579.995038", - "retrieved_timestamp": "1762652579.995039", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/MultiverseEx26-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/MultiverseEx26-7B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3938516469633905 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5133591871690678 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0755287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4293125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3035239361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_NeuralWestSeverus-7B-slerp/fc6d4451-0a9c-4d53-8d22-179ff7059d61.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_NeuralWestSeverus-7B-slerp/fc6d4451-0a9c-4d53-8d22-179ff7059d61.json deleted file mode 100644 index d9511a3b9fadd5fff14699809db9161160d15108..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_NeuralWestSeverus-7B-slerp/fc6d4451-0a9c-4d53-8d22-179ff7059d61.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_NeuralWestSeverus-7B-slerp/1762652579.995253", - "retrieved_timestamp": "1762652579.995254", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/NeuralWestSeverus-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/NeuralWestSeverus-7B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41356046401326263 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5244283854305991 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45287499999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3137466755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Neuralcoven-7B-slerp/ba46f82b-2129-43db-ae21-09e6576dc4e6.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Neuralcoven-7B-slerp/ba46f82b-2129-43db-ae21-09e6576dc4e6.json deleted file mode 100644 index 3267ae27582d5de8ac48faf122230d20b2aad2c9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Neuralcoven-7B-slerp/ba46f82b-2129-43db-ae21-09e6576dc4e6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Neuralcoven-7B-slerp/1762652579.995681", - "retrieved_timestamp": "1762652579.995682", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Neuralcoven-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Neuralcoven-7B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3858584112377381 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.530287217712165 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.429 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3293716755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Neuralmultiverse-7B-slerp/b98b76ea-b068-46ec-b929-4ca1037eaf99.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Neuralmultiverse-7B-slerp/b98b76ea-b068-46ec-b929-4ca1037eaf99.json deleted file mode 100644 index 732b7e8c1f336cd162988d63c6325d78c1a77ff7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Neuralmultiverse-7B-slerp/b98b76ea-b068-46ec-b929-4ca1037eaf99.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Neuralmultiverse-7B-slerp/1762652579.995954", - "retrieved_timestamp": "1762652579.995955", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Neuralmultiverse-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Neuralmultiverse-7B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3769154731667531 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5165722210470375 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42804166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30418882978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3della5-14B/d5a47313-b2f5-4833-9539-b8f56e4a5fda.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3della5-14B/d5a47313-b2f5-4833-9539-b8f56e4a5fda.json deleted file mode 100644 index 4b3cc7c695a3b97dc4afd03f42052f90e21d2ab9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3della5-14B/d5a47313-b2f5-4833-9539-b8f56e4a5fda.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3della5-14B/1762652579.9961941", - "retrieved_timestamp": "1762652579.996195", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Ph3della5-14B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Ph3della5-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47985567183960776 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6331746353794991 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17673716012084592 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3422818791946309 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4386145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4787234042553192 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.96 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3merge-14B/95228f47-8fb1-443c-8ad4-0021504e34e0.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3merge-14B/95228f47-8fb1-443c-8ad4-0021504e34e0.json deleted file mode 100644 index 422c3d87a755163cc9b66b5e722b3e9eadb0e4d9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3merge-14B/95228f47-8fb1-443c-8ad4-0021504e34e0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3merge-14B/1762652579.996419", - "retrieved_timestamp": "1762652579.9964201", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Ph3merge-14B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Ph3merge-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27012881376968667 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.638087568868341 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4334375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4611037234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.619 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3merge2-14B/b5790fec-6c12-42a3-853c-488658bf949d.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3merge2-14B/b5790fec-6c12-42a3-853c-488658bf949d.json deleted file mode 100644 index 279c00686f91f438e82c12c3eacf13e5cb0d5b88..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3merge2-14B/b5790fec-6c12-42a3-853c-488658bf949d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3merge2-14B/1762652579.996639", - "retrieved_timestamp": "1762652579.99664", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Ph3merge2-14B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Ph3merge2-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17061064641817045 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3606937444321621 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3910833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1722905585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.619 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3merge3-14B/e5d9bded-a8e4-4133-84b9-6eac517a4226.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3merge3-14B/e5d9bded-a8e4-4133-84b9-6eac517a4226.json deleted file mode 100644 index 40127acc295b83bfb266ae82753be8659ce5a69b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3merge3-14B/e5d9bded-a8e4-4133-84b9-6eac517a4226.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3merge3-14B/1762652579.99685", - "retrieved_timestamp": "1762652579.996851", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Ph3merge3-14B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Ph3merge3-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1645157072124186 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3597431731140411 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40819791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16472739361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.619 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3task1-14B/718ef6de-5926-4a4c-bade-9a162ce8e730.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3task1-14B/718ef6de-5926-4a4c-bade-9a162ce8e730.json deleted file mode 100644 index 1b52cda4647ece7070bd4b4d0efcb489818fe8a9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3task1-14B/718ef6de-5926-4a4c-bade-9a162ce8e730.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3task1-14B/1762652579.997059", - "retrieved_timestamp": "1762652579.99706", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Ph3task1-14B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Ph3task1-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46946435457918323 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.63178060736657 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16691842900302115 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45077083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4734042553191489 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.96 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3task2-14B/5d818d86-2caf-4b29-9c15-8fa27217de22.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3task2-14B/5d818d86-2caf-4b29-9c15-8fa27217de22.json deleted file mode 100644 index 719f0a115af0487d4b118c1fbef03432fa471163..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3task2-14B/5d818d86-2caf-4b29-9c15-8fa27217de22.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3task2-14B/1762652579.99728", - "retrieved_timestamp": "1762652579.997281", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Ph3task2-14B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Ph3task2-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4713127834146731 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6098412220695854 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4535 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44597739361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.96 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3task3-14B/a935c0d1-6623-45c6-a100-96c8b5a3a2fb.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3task3-14B/a935c0d1-6623-45c6-a100-96c8b5a3a2fb.json deleted file mode 100644 index 51e1df9078b7574d4d7789702bc244cab87d37f9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3task3-14B/a935c0d1-6623-45c6-a100-96c8b5a3a2fb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3task3-14B/1762652579.997498", - "retrieved_timestamp": "1762652579.997499", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Ph3task3-14B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Ph3task3-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4962421929369628 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6297915743094921 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44255208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47706117021276595 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.96 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3unsloth-3B-slerp/0a9be33a-792e-413c-b60d-3e97a060fa78.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3unsloth-3B-slerp/0a9be33a-792e-413c-b60d-3e97a060fa78.json deleted file mode 100644 index 405c47e04fdbed0aabbfd91fbd46aa496bdb1d5e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3unsloth-3B-slerp/0a9be33a-792e-413c-b60d-3e97a060fa78.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3unsloth-3B-slerp/1762652579.99772", - "retrieved_timestamp": "1762652579.99772", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Ph3unsloth-3B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Ph3unsloth-3B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18944511673470835 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5468077356147099 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10120845921450151 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45278124999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3700964095744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Quen2-65B/4bc3f55b-0638-4fc2-b1d9-04780707acef.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Quen2-65B/4bc3f55b-0638-4fc2-b1d9-04780707acef.json deleted file mode 100644 index 75a207e17116750d26a721993ee9eb092a86862b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Quen2-65B/4bc3f55b-0638-4fc2-b1d9-04780707acef.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Quen2-65B/1762652579.9981499", - "retrieved_timestamp": "1762652579.9981499", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Quen2-65B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Quen2-65B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17578137120617737 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27565161872324456 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23573825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32085416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11136968085106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 63.923 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_RogerMerge-7B-slerp/50289a8b-4522-4dca-b6dc-aa42193deefa.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_RogerMerge-7B-slerp/50289a8b-4522-4dca-b6dc-aa42193deefa.json deleted file mode 100644 index 54519ce30ad465d976b1ef3a4d86bac8383fa431..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_RogerMerge-7B-slerp/50289a8b-4522-4dca-b6dc-aa42193deefa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_RogerMerge-7B-slerp/1762652580.002474", - "retrieved_timestamp": "1762652580.002475", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/RogerMerge-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/RogerMerge-7B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39330199426410817 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5160176493085935 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43197916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30302526595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Strangecoven-7B-slerp/f125c8d1-57f3-4b79-ace4-2104b008a507.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Strangecoven-7B-slerp/f125c8d1-57f3-4b79-ace4-2104b008a507.json deleted file mode 100644 index 3b14accda98ba1b21f530f5cb507a3c39dff0d5d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Strangecoven-7B-slerp/f125c8d1-57f3-4b79-ace4-2104b008a507.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Strangecoven-7B-slerp/1762652580.002888", - "retrieved_timestamp": "1762652580.002889", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Strangecoven-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Strangecoven-7B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37464261492839 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5368022290282338 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4198854166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33643617021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Weirdslerp2-25B/61e517f7-e2db-48bd-8f4e-f62b5859b62e.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Weirdslerp2-25B/61e517f7-e2db-48bd-8f4e-f62b5859b62e.json deleted file mode 100644 index 439f5d6a4dead5f8045d3c5f3b7114db20673605..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Weirdslerp2-25B/61e517f7-e2db-48bd-8f4e-f62b5859b62e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Weirdslerp2-25B/1762652580.00309", - "retrieved_timestamp": "1762652580.0030909", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Weirdslerp2-25B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Weirdslerp2-25B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1754068094877148 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2873695911207614 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11278257978723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 25.204 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_WestlakeMaziyar-7B-slerp/2db948db-a9e5-41cf-9567-2f9198d80900.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_WestlakeMaziyar-7B-slerp/2db948db-a9e5-41cf-9567-2f9198d80900.json deleted file mode 100644 index 2d9780ebf41d217f8e830f0c2f4fa2553bd94ccf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_WestlakeMaziyar-7B-slerp/2db948db-a9e5-41cf-9567-2f9198d80900.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_WestlakeMaziyar-7B-slerp/1762652580.003291", - "retrieved_timestamp": "1762652580.0032918", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/WestlakeMaziyar-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/WestlakeMaziyar-7B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48377748817581795 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5245479952765804 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44738541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3077626329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_YamMaths-7B-slerp/52ab1e94-4e6f-4876-932b-a45a033dec1b.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_YamMaths-7B-slerp/52ab1e94-4e6f-4876-932b-a45a033dec1b.json deleted file mode 100644 index 73dff62b9b470f9cd03050b92eb7839109c52539..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_YamMaths-7B-slerp/52ab1e94-4e6f-4876-932b-a45a033dec1b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_YamMaths-7B-slerp/1762652580.003488", - "retrieved_timestamp": "1762652580.003489", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/YamMaths-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/YamMaths-7B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4148093724650594 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5155845857281723 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08534743202416918 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43836458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3130817819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yi-1.5-34B/98455065-72e1-4dad-bce1-1c3ceddf5433.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yi-1.5-34B/98455065-72e1-4dad-bce1-1c3ceddf5433.json deleted file mode 100644 index fee1804ebf001a3845a7ae9177d6742ce4d1e27d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yi-1.5-34B/98455065-72e1-4dad-bce1-1c3ceddf5433.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Yi-1.5-34B/1762652580.0036852", - "retrieved_timestamp": "1762652580.003686", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Yi-1.5-34B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Yi-1.5-34B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16391618682872555 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28272506287695653 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38565625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10954122340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yi-blossom-40B/b35eaca2-0f77-4171-bbcf-23a191b055f2.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yi-blossom-40B/b35eaca2-0f77-4171-bbcf-23a191b055f2.json deleted file mode 100644 index c2d5fae60697662dde37a21171ef63c2e19ecb38..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yi-blossom-40B/b35eaca2-0f77-4171-bbcf-23a191b055f2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Yi-blossom-40B/1762652580.004046", - "retrieved_timestamp": "1762652580.0040479", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Yi-blossom-40B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Yi-blossom-40B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20088587170928693 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32150442258143547 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3842604166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10804521276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 18.769 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yibuddy-35B/dc2688b9-9dff-4a2e-b3d8-3bdc82634d20.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yibuddy-35B/dc2688b9-9dff-4a2e-b3d8-3bdc82634d20.json deleted file mode 100644 index 4ce44f0d067b06a8c7fd4262dd1d4a43e1c0cdc3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yibuddy-35B/dc2688b9-9dff-4a2e-b3d8-3bdc82634d20.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Yibuddy-35B/1762652580.004411", - "retrieved_timestamp": "1762652580.004412", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Yibuddy-35B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Yibuddy-35B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4234774841864032 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5916185369526096 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15709969788519637 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35570469798657717 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45045833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44888630319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yislerp-34B/723d2f60-f12a-4abb-9061-807fd38e7d51.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yislerp-34B/723d2f60-f12a-4abb-9061-807fd38e7d51.json deleted file mode 100644 index 0d29f7e54653e498212ee2b1a80f130899e082ec..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yislerp-34B/723d2f60-f12a-4abb-9061-807fd38e7d51.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Yislerp-34B/1762652580.0049741", - "retrieved_timestamp": "1762652580.004975", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Yislerp-34B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Yislerp-34B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3691970637907419 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6158722731484186 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21601208459214502 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35822147651006714 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.456625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4751496010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yislerp2-34B/ce55aca1-80bd-4711-ad05-d812d206bd14.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yislerp2-34B/ce55aca1-80bd-4711-ad05-d812d206bd14.json deleted file mode 100644 index d1528c2b3cd2c58e3d040316460df4ae9fd5d136..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yislerp2-34B/ce55aca1-80bd-4711-ad05-d812d206bd14.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Yislerp2-34B/1762652580.005196", - "retrieved_timestamp": "1762652580.005197", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Yislerp2-34B", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Yislerp2-34B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39994658616914236 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6245771970170245 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640939597315436 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45296875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.472406914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yunconglong-13B-slerp/8ae47af1-5ae6-4cb9-ac94-8d70fda5126d.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yunconglong-13B-slerp/8ae47af1-5ae6-4cb9-ac94-8d70fda5126d.json deleted file mode 100644 index 57becbd06a4ffca27d7f9c1eed906710f611c045..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yunconglong-13B-slerp/8ae47af1-5ae6-4cb9-ac94-8d70fda5126d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Yunconglong-13B-slerp/1762652580.005601", - "retrieved_timestamp": "1762652580.005603", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Yunconglong-13B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/Yunconglong-13B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42417673993891764 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5165807158493828 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4160729166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30360704787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_limyClown-7B-slerp/420f8334-c420-4b8f-8853-fea8f4f5ac6d.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_limyClown-7B-slerp/420f8334-c420-4b8f-8853-fea8f4f5ac6d.json deleted file mode 100644 index 07cd165f20c9e3417edd53be2a6c2660e6b9bbd7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_limyClown-7B-slerp/420f8334-c420-4b8f-8853-fea8f4f5ac6d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_limyClown-7B-slerp/1762652580.005876", - "retrieved_timestamp": "1762652580.005877", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/limyClown-7B-slerp", - "developer": "allknowingroger", - "inference_platform": "unknown", - "id": "allknowingroger/limyClown-7B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4017451473202215 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5147517317055973 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4293125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30377327127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_L3.1-8b-RP-Ink/cb8c45ae-1be6-4ab0-9317-cfbfc8850dc4.json b/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_L3.1-8b-RP-Ink/cb8c45ae-1be6-4ab0-9317-cfbfc8850dc4.json deleted file mode 100644 index 3fa9328fa1d66d9f4e06bbd92af4a4bb402139c6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_L3.1-8b-RP-Ink/cb8c45ae-1be6-4ab0-9317-cfbfc8850dc4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allura-org_L3.1-8b-RP-Ink/1762652580.006678", - "retrieved_timestamp": "1762652580.006679", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allura-org/L3.1-8b-RP-Ink", - "developer": "allura-org", - "inference_platform": "unknown", - "id": "allura-org/L3.1-8b-RP-Ink" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7811063533646281 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48284724308518095 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14803625377643503 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3608229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3427526595744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_MN-12b-RP-Ink/3dc6cdf9-e75d-4f9f-9b91-9592e70566f8.json b/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_MN-12b-RP-Ink/3dc6cdf9-e75d-4f9f-9b91-9592e70566f8.json deleted file mode 100644 index 7378c1ed7638f472eaff0b4065f0e81286aa8719..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_MN-12b-RP-Ink/3dc6cdf9-e75d-4f9f-9b91-9592e70566f8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allura-org_MN-12b-RP-Ink/1762652580.006974", - "retrieved_timestamp": "1762652580.006975", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allura-org/MN-12b-RP-Ink", - "developer": "allura-org", - "inference_platform": "unknown", - "id": "allura-org/MN-12b-RP-Ink" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7186332265056716 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4833826588550261 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11858006042296072 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38184375000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3513962765957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_MS-Meadowlark-22B/7ea2cf22-114f-449c-a9cf-c4f379646cd3.json b/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_MS-Meadowlark-22B/7ea2cf22-114f-449c-a9cf-c4f379646cd3.json deleted file mode 100644 index 428d9e6b5653df5d81003b668a9dc255a84f73b7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_MS-Meadowlark-22B/7ea2cf22-114f-449c-a9cf-c4f379646cd3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allura-org_MS-Meadowlark-22B/1762652580.007196", - "retrieved_timestamp": "1762652580.007197", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allura-org/MS-Meadowlark-22B", - "developer": "allura-org", - "inference_platform": "unknown", - "id": "allura-org/MS-Meadowlark-22B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.669698621878837 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5162576933217772 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18353474320241692 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3842604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38231382978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_MoE-Girl-1BA-7BT/5b3176a0-7ded-409a-bc54-70e0ecf9b325.json b/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_MoE-Girl-1BA-7BT/5b3176a0-7ded-409a-bc54-70e0ecf9b325.json deleted file mode 100644 index 1f85340e533310f48a2d51427049f75bd87079c0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_MoE-Girl-1BA-7BT/5b3176a0-7ded-409a-bc54-70e0ecf9b325.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allura-org_MoE-Girl-1BA-7BT/1762652580.0080209", - "retrieved_timestamp": "1762652580.008022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allura-org/MoE-Girl-1BA-7BT", - "developer": "allura-org", - "inference_platform": "unknown", - "id": "allura-org/MoE-Girl-1BA-7BT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27050337548814923 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3139175363262408 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34355208333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12175864361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "OlmoeForCausalLM", - "params_billions": 6.919 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_TQ2.5-14B-Aletheia-v1/b46bef60-b37b-4510-a92a-fb4c0cabb357.json b/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_TQ2.5-14B-Aletheia-v1/b46bef60-b37b-4510-a92a-fb4c0cabb357.json deleted file mode 100644 index 128adaba22204cc173a6c58d4dfdd6fc61277159..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_TQ2.5-14B-Aletheia-v1/b46bef60-b37b-4510-a92a-fb4c0cabb357.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allura-org_TQ2.5-14B-Aletheia-v1/1762652580.008265", - "retrieved_timestamp": "1762652580.008276", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allura-org/TQ2.5-14B-Aletheia-v1", - "developer": "allura-org", - "inference_platform": "unknown", - "id": "allura-org/TQ2.5-14B-Aletheia-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7530297388706411 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6585074769185942 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624161073825503 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44515625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5241023936170213 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_TQ2.5-14B-Neon-v1/68bdab24-8324-4190-abd2-ad3ad5a7a853.json b/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_TQ2.5-14B-Neon-v1/68bdab24-8324-4190-abd2-ad3ad5a7a853.json deleted file mode 100644 index e9c11a66c0e195470fd46a27a8e29d1021175556..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_TQ2.5-14B-Neon-v1/68bdab24-8324-4190-abd2-ad3ad5a7a853.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allura-org_TQ2.5-14B-Neon-v1/1762652580.0085812", - "retrieved_timestamp": "1762652580.0085819", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allura-org/TQ2.5-14B-Neon-v1", - "developer": "allura-org", - "inference_platform": "unknown", - "id": "allura-org/TQ2.5-14B-Neon-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6754189993661264 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.655304131044165 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36027190332326287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716442953020134 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.461 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5252659574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_Teleut-7b/85ceb275-787a-4dbc-981a-513fd16606ea.json b/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_Teleut-7b/85ceb275-787a-4dbc-981a-513fd16606ea.json deleted file mode 100644 index 9a3b9bf6dd252bfb489e8a81ccc6b7b7284e9ba0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_Teleut-7b/85ceb275-787a-4dbc-981a-513fd16606ea.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allura-org_Teleut-7b/1762652580.008814", - "retrieved_timestamp": "1762652580.008814", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allura-org/Teleut-7b", - "developer": "allura-org", - "inference_platform": "unknown", - "id": "allura-org/Teleut-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6378752820294595 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5141277814496585 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24093655589123866 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4640416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4130651595744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/aloobun/aloobun_d-SmolLM2-360M/1ad7b4c4-8074-482e-9010-ce1552325e15.json b/leaderboard_data/HFOpenLLMv2/aloobun/aloobun_d-SmolLM2-360M/1ad7b4c4-8074-482e-9010-ce1552325e15.json deleted file mode 100644 index f9fb6355bf70b8ceeadd0c7c03bbd1bc3922261c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/aloobun/aloobun_d-SmolLM2-360M/1ad7b4c4-8074-482e-9010-ce1552325e15.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/aloobun_d-SmolLM2-360M/1762652580.0092921", - "retrieved_timestamp": "1762652580.009293", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "aloobun/d-SmolLM2-360M", - "developer": "aloobun", - "inference_platform": "unknown", - "id": "aloobun/d-SmolLM2-360M" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20970358648386284 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3195784405636826 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3980625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11693816489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.362 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alpindale/alpindale_WizardLM-2-8x22B/c2899c4e-5bc9-4b0b-8938-b9848b86fe37.json b/leaderboard_data/HFOpenLLMv2/alpindale/alpindale_WizardLM-2-8x22B/c2899c4e-5bc9-4b0b-8938-b9848b86fe37.json deleted file mode 100644 index c6bdd758d3a36e402e887af6d0f8b6426f83d8dd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alpindale/alpindale_WizardLM-2-8x22B/c2899c4e-5bc9-4b0b-8938-b9848b86fe37.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/alpindale_WizardLM-2-8x22B/1762652580.009551", - "retrieved_timestamp": "1762652580.0095518", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "alpindale/WizardLM-2-8x22B", - "developer": "alpindale", - "inference_platform": "unknown", - "id": "alpindale/WizardLM-2-8x22B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5272166739805937 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6377307938917097 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4387083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45960771276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 140.621 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/alpindale/alpindale_magnum-72b-v1/186687f8-ed25-44c9-b634-36db1c734844.json b/leaderboard_data/HFOpenLLMv2/alpindale/alpindale_magnum-72b-v1/186687f8-ed25-44c9-b634-36db1c734844.json deleted file mode 100644 index 644786870687d66cbbe731cb4e1e1a0280f88bdf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/alpindale/alpindale_magnum-72b-v1/186687f8-ed25-44c9-b634-36db1c734844.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/alpindale_magnum-72b-v1/1762652580.0098088", - "retrieved_timestamp": "1762652580.00981", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "alpindale/magnum-72b-v1", - "developer": "alpindale", - "inference_platform": "unknown", - "id": "alpindale/magnum-72b-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7606484128778308 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6982215794373214 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39803625377643503 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39093959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4489375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5467918882978723 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/altomek/altomek_YiSM-34B-0rn/a9c75810-f51d-4fd3-8c96-6afdbc0f278c.json b/leaderboard_data/HFOpenLLMv2/altomek/altomek_YiSM-34B-0rn/a9c75810-f51d-4fd3-8c96-6afdbc0f278c.json deleted file mode 100644 index 102b9570e46a4e4c2baf441b262f6565aa1a4149..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/altomek/altomek_YiSM-34B-0rn/a9c75810-f51d-4fd3-8c96-6afdbc0f278c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/altomek_YiSM-34B-0rn/1762652580.010027", - "retrieved_timestamp": "1762652580.0100281", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "altomek/YiSM-34B-0rn", - "developer": "altomek", - "inference_platform": "unknown", - "id": "altomek/YiSM-34B-0rn" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.428373382624769 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6140009573868866 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2280966767371601 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716442953020134 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.445 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4695811170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v1-72b/6d98f0fa-25c9-409b-b82e-b3c128bf47b6.json b/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v1-72b/6d98f0fa-25c9-409b-b82e-b3c128bf47b6.json deleted file mode 100644 index bb64ccfe0cc8217804b53eefe099c6cde2f18db2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v1-72b/6d98f0fa-25c9-409b-b82e-b3c128bf47b6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v1-72b/1762652580.0112262", - "retrieved_timestamp": "1762652580.011227", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "anthracite-org/magnum-v1-72b", - "developer": "anthracite-org", - "inference_platform": "unknown", - "id": "anthracite-org/magnum-v1-72b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7606484128778308 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6982215794373214 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39803625377643503 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39093959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4489375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5486203457446809 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v2-12b/72821a7d-cc27-4557-82d4-7e30286ea126.json b/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v2-12b/72821a7d-cc27-4557-82d4-7e30286ea126.json deleted file mode 100644 index c6406bce48723e6529e9394e13d63882304366d3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v2-12b/72821a7d-cc27-4557-82d4-7e30286ea126.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v2-12b/1762652580.011473", - "retrieved_timestamp": "1762652580.011474", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "anthracite-org/magnum-v2-12b", - "developer": "anthracite-org", - "inference_platform": "unknown", - "id": "anthracite-org/magnum-v2-12b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.376166349729828 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5020864013200114 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41790625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31673869680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v2-72b/31d80ab1-348f-4b5a-963e-f027adf32101.json b/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v2-72b/31d80ab1-348f-4b5a-963e-f027adf32101.json deleted file mode 100644 index 7171ebe2cc702db71e6cabec65d0f6aa9e576842..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v2-72b/31d80ab1-348f-4b5a-963e-f027adf32101.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v2-72b/1762652580.01168", - "retrieved_timestamp": "1762652580.01168", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "anthracite-org/magnum-v2-72b", - "developer": "anthracite-org", - "inference_platform": "unknown", - "id": "anthracite-org/magnum-v2-72b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7560273407891063 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7005076514129516 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3542296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4371875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5456283244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v2.5-12b-kto/74e67572-01d9-4890-9c5a-27b5559cf752.json b/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v2.5-12b-kto/74e67572-01d9-4890-9c5a-27b5559cf752.json deleted file mode 100644 index 6e6ed599185531ac424947eaf58c123f52c68a64..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v2.5-12b-kto/74e67572-01d9-4890-9c5a-27b5559cf752.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v2.5-12b-kto/1762652580.011887", - "retrieved_timestamp": "1762652580.011888", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "anthracite-org/magnum-v2.5-12b-kto", - "developer": "anthracite-org", - "inference_platform": "unknown", - "id": "anthracite-org/magnum-v2.5-12b-kto" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3865576669902525 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5076961186254344 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40863541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3214760638297872 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v3-27b-kto/9a74a1f1-0322-4f96-8e52-76bbde948fa9.json b/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v3-27b-kto/9a74a1f1-0322-4f96-8e52-76bbde948fa9.json deleted file mode 100644 index d594f30fbc6585c675fcc4962a9b5a8f390dd6ff..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v3-27b-kto/9a74a1f1-0322-4f96-8e52-76bbde948fa9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v3-27b-kto/1762652580.012144", - "retrieved_timestamp": "1762652580.0121448", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "anthracite-org/magnum-v3-27b-kto", - "developer": "anthracite-org", - "inference_platform": "unknown", - "id": "anthracite-org/magnum-v3-27b-kto" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5674831668860845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.586040577894583 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18126888217522658 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35570469798657717 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38546874999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42378656914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v3-34b/8ace78d5-5390-49ec-935d-2c7faf7569ca.json b/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v3-34b/8ace78d5-5390-49ec-935d-2c7faf7569ca.json deleted file mode 100644 index 55c2d45a1c2f78f4e821c862b7537d4adc49b4c4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v3-34b/8ace78d5-5390-49ec-935d-2c7faf7569ca.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v3-34b/1762652580.012352", - "retrieved_timestamp": "1762652580.012352", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "anthracite-org/magnum-v3-34b", - "developer": "anthracite-org", - "inference_platform": "unknown", - "id": "anthracite-org/magnum-v3-34b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5115294086357531 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6087828692085228 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19486404833836857 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36073825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3872395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47523271276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v3-9b-chatml/42df1809-0021-4968-a18b-86cefc0125d7.json b/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v3-9b-chatml/42df1809-0021-4968-a18b-86cefc0125d7.json deleted file mode 100644 index 3286d66f609439793ad22410d372d8b5b49ebd64..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v3-9b-chatml/42df1809-0021-4968-a18b-86cefc0125d7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v3-9b-chatml/1762652580.0125592", - "retrieved_timestamp": "1762652580.0125592", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "anthracite-org/magnum-v3-9b-chatml", - "developer": "anthracite-org", - "inference_platform": "unknown", - "id": "anthracite-org/magnum-v3-9b-chatml" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12747066671985885 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5427688488887096 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06948640483383686 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4432291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4242021276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v4-12b/c7ba8947-fd38-4ba1-9169-6c9164123273.json b/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v4-12b/c7ba8947-fd38-4ba1-9169-6c9164123273.json deleted file mode 100644 index fb37b306aa09b1cf23957e6dc840422caac4b04e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v4-12b/c7ba8947-fd38-4ba1-9169-6c9164123273.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v4-12b/1762652580.013016", - "retrieved_timestamp": "1762652580.013016", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "anthracite-org/magnum-v4-12b", - "developer": "anthracite-org", - "inference_platform": "unknown", - "id": "anthracite-org/magnum-v4-12b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33929640021808805 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5176693046591915 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40928125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3603723404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v4-22b/5e3f808c-964d-492d-a003-37594dd36f89.json b/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v4-22b/5e3f808c-964d-492d-a003-37594dd36f89.json deleted file mode 100644 index 784a61915ffe4f547a111fa57b71ba5ab697bfc4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v4-22b/5e3f808c-964d-492d-a003-37594dd36f89.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v4-22b/1762652580.013223", - "retrieved_timestamp": "1762652580.013224", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "anthracite-org/magnum-v4-22b", - "developer": "anthracite-org", - "inference_platform": "unknown", - "id": "anthracite-org/magnum-v4-22b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5628620947973599 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.548612004937422 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2001510574018127 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44078124999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3829787234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v4-27b/113ce0c6-c292-4924-adca-afdbcdd4c381.json b/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v4-27b/113ce0c6-c292-4924-adca-afdbcdd4c381.json deleted file mode 100644 index 6c2caa3b15d173fd227375581940017c82ca62e1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v4-27b/113ce0c6-c292-4924-adca-afdbcdd4c381.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v4-27b/1762652580.013432", - "retrieved_timestamp": "1762652580.013433", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "anthracite-org/magnum-v4-27b", - "developer": "anthracite-org", - "inference_platform": "unknown", - "id": "anthracite-org/magnum-v4-27b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34541682735142754 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5867298109891389 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699664429530201 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4379895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43758311170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v4-9b/55401aa6-ad61-42d6-9163-5d105a9091bf.json b/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v4-9b/55401aa6-ad61-42d6-9163-5d105a9091bf.json deleted file mode 100644 index 10772be33cd10dd4e008cf2c7e7befc4148f80d4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v4-9b/55401aa6-ad61-42d6-9163-5d105a9091bf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v4-9b/1762652580.013639", - "retrieved_timestamp": "1762652580.013639", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "anthracite-org/magnum-v4-9b", - "developer": "anthracite-org", - "inference_platform": "unknown", - "id": "anthracite-org/magnum-v4-9b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3502628581053826 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5336423991931557 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13066465256797583 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45157291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3952792553191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/anthropic/xxx777xxxASD_L3.1-ClaudeMaid-4x8B/ae6d070b-71de-40c3-8f69-944ce2e33abb.json b/leaderboard_data/HFOpenLLMv2/anthropic/xxx777xxxASD_L3.1-ClaudeMaid-4x8B/ae6d070b-71de-40c3-8f69-944ce2e33abb.json deleted file mode 100644 index a9b8f8176be08a31da5a6ce417d3a30a531d05a9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/anthropic/xxx777xxxASD_L3.1-ClaudeMaid-4x8B/ae6d070b-71de-40c3-8f69-944ce2e33abb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/xxx777xxxASD_L3.1-ClaudeMaid-4x8B/1762652580.602767", - "retrieved_timestamp": "1762652580.602768", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "xxx777xxxASD/L3.1-ClaudeMaid-4x8B", - "developer": "anthropic", - "inference_platform": "unknown", - "id": "xxx777xxxASD/L3.1-ClaudeMaid-4x8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6696487541944263 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5070848048063867 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14123867069486404 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42893749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35804521276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.942 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/apple/apple_DCLM-7B/3891ad0a-0acf-4d3e-a9e8-533633d9557a.json b/leaderboard_data/HFOpenLLMv2/apple/apple_DCLM-7B/3891ad0a-0acf-4d3e-a9e8-533633d9557a.json deleted file mode 100644 index 2f8794dc36476ecd5f43481152a4d9bd01da8fb8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/apple/apple_DCLM-7B/3891ad0a-0acf-4d3e-a9e8-533633d9557a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/apple_DCLM-7B/1762652580.0138528", - "retrieved_timestamp": "1762652580.013854", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "apple/DCLM-7B", - "developer": "apple", - "inference_platform": "unknown", - "id": "apple/DCLM-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21727239280664196 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42321423668184166 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3920729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3110871010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "OpenLMModel", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/appvoid/appvoid_arco-2-instruct/95d1d5d9-b613-46b4-b0de-540641d8d81a.json b/leaderboard_data/HFOpenLLMv2/appvoid/appvoid_arco-2-instruct/95d1d5d9-b613-46b4-b0de-540641d8d81a.json deleted file mode 100644 index 1290f6154bb66918bd21ce8bf564391146f0529c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/appvoid/appvoid_arco-2-instruct/95d1d5d9-b613-46b4-b0de-540641d8d81a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/appvoid_arco-2-instruct/1762652580.014716", - "retrieved_timestamp": "1762652580.0147169", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "appvoid/arco-2-instruct", - "developer": "appvoid", - "inference_platform": "unknown", - "id": "appvoid/arco-2-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2164479137577184 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31330470624451107 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23825503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34959375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11128656914893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.514 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/appvoid/appvoid_arco-2/a037593c-0f98-4b23-a139-12cfc435de3c.json b/leaderboard_data/HFOpenLLMv2/appvoid/appvoid_arco-2/a037593c-0f98-4b23-a139-12cfc435de3c.json deleted file mode 100644 index f3456f654e15872ad6a9c42c69ca6589bb58a60c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/appvoid/appvoid_arco-2/a037593c-0f98-4b23-a139-12cfc435de3c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/appvoid_arco-2/1762652580.014345", - "retrieved_timestamp": "1762652580.014347", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "appvoid/arco-2", - "developer": "appvoid", - "inference_platform": "unknown", - "id": "appvoid/arco-2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19913717824261848 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31456676274830814 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23909395973154363 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35359375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1116190159574468 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.514 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Blitz/01e8e033-1aa9-42e2-85d8-b7974d0c9e23.json b/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Blitz/01e8e033-1aa9-42e2-85d8-b7974d0c9e23.json deleted file mode 100644 index 0e3cd2d5ca4e1e4652c3483c42cdf372541ce53a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Blitz/01e8e033-1aa9-42e2-85d8-b7974d0c9e23.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/arcee-ai_Arcee-Blitz/1762652580.0149639", - "retrieved_timestamp": "1762652580.014965", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "arcee-ai/Arcee-Blitz", - "developer": "arcee-ai", - "inference_platform": "unknown", - "id": "arcee-ai/Arcee-Blitz" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5543435861292482 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6606628431550884 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34818731117824775 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3850671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.50471875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6153590425531915 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Maestro-7B-Preview/126f5eda-1529-450f-8557-dcd6a33b7bd4.json b/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Maestro-7B-Preview/126f5eda-1529-450f-8557-dcd6a33b7bd4.json deleted file mode 100644 index 170c6167a734fc0dc31da6616a2989e3a6b601f1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Maestro-7B-Preview/126f5eda-1529-450f-8557-dcd6a33b7bd4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/arcee-ai_Arcee-Maestro-7B-Preview/1762652580.015253", - "retrieved_timestamp": "1762652580.015254", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "arcee-ai/Arcee-Maestro-7B-Preview", - "developer": "arcee-ai", - "inference_platform": "unknown", - "id": "arcee-ai/Arcee-Maestro-7B-Preview" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2750247122080524 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4648373015709704 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49924471299093653 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3885416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3039394946808511 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Nova/9063608f-8d32-4e98-ad05-621f6239d0ba.json b/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Nova/9063608f-8d32-4e98-ad05-621f6239d0ba.json deleted file mode 100644 index 6536419adce18ba294f9d2cc68cb6290fe3c94e8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Nova/9063608f-8d32-4e98-ad05-621f6239d0ba.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/arcee-ai_Arcee-Nova/1762652580.0154781", - "retrieved_timestamp": "1762652580.015479", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "arcee-ai/Arcee-Nova", - "developer": "arcee-ai", - "inference_platform": "unknown", - "id": "arcee-ai/Arcee-Nova" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7907485471881275 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.694196965855899 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3850671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45616666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5452127659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Spark/1dde2278-39aa-43cf-8d94-5d4a0bb514ca.json b/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Spark/1dde2278-39aa-43cf-8d94-5d4a0bb514ca.json deleted file mode 100644 index 9cb008bd1ed032e8c5b6d039cfa6425c18f3a45b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Spark/1dde2278-39aa-43cf-8d94-5d4a0bb514ca.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/arcee-ai_Arcee-Spark/1762652580.0159192", - "retrieved_timestamp": "1762652580.0159202", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "arcee-ai/Arcee-Spark", - "developer": "arcee-ai", - "inference_platform": "unknown", - "id": "arcee-ai/Arcee-Spark" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.571829412625168 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5480864114714127 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858004 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4007604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38131648936170215 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Spark/84a51879-cd67-449b-ace0-f87cccd6ea8c.json b/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Spark/84a51879-cd67-449b-ace0-f87cccd6ea8c.json deleted file mode 100644 index fc8bf883871e987625dc1d280204c4b152124d39..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Spark/84a51879-cd67-449b-ace0-f87cccd6ea8c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/arcee-ai_Arcee-Spark/1762652580.015698", - "retrieved_timestamp": "1762652580.015699", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "arcee-ai/Arcee-Spark", - "developer": "arcee-ai", - "inference_platform": "unknown", - "id": "arcee-ai/Arcee-Spark" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5620874834328471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5489474198567446 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29531722054380666 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40209374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3822307180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_SuperNova-Medius/7e0e8ab9-a90b-4f0e-8e0a-eeceac12a4a1.json b/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_SuperNova-Medius/7e0e8ab9-a90b-4f0e-8e0a-eeceac12a4a1.json deleted file mode 100644 index b2de2bb6e19fd81ff2de24b3359ed31aecad0f95..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_SuperNova-Medius/7e0e8ab9-a90b-4f0e-8e0a-eeceac12a4a1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/arcee-ai_SuperNova-Medius/1762652580.016611", - "retrieved_timestamp": "1762652580.016612", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "arcee-ai/SuperNova-Medius", - "developer": "arcee-ai", - "inference_platform": "unknown", - "id": "arcee-ai/SuperNova-Medius" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7183584001560305 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6377284463115707 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4690332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4232708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5034906914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Virtuoso-Lite/62afba84-9929-4882-843e-3f7db7b030a3.json b/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Virtuoso-Lite/62afba84-9929-4882-843e-3f7db7b030a3.json deleted file mode 100644 index 3700230a541934936e8cb378702255986768bb10..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Virtuoso-Lite/62afba84-9929-4882-843e-3f7db7b030a3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/arcee-ai_Virtuoso-Lite/1762652580.0168262", - "retrieved_timestamp": "1762652580.0168269", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "arcee-ai/Virtuoso-Lite", - "developer": "arcee-ai", - "inference_platform": "unknown", - "id": "arcee-ai/Virtuoso-Lite" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8099575792231279 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6098520975127147 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25302114803625375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4595416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4440658244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Virtuoso-Small-v2/325cf0a5-6a72-466a-8e1e-531f03db6083.json b/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Virtuoso-Small-v2/325cf0a5-6a72-466a-8e1e-531f03db6083.json deleted file mode 100644 index 31dfe246ef26e483e1cdcd6c4537d5b7281467cc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Virtuoso-Small-v2/325cf0a5-6a72-466a-8e1e-531f03db6083.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/arcee-ai_Virtuoso-Small-v2/1762652580.0172758", - "retrieved_timestamp": "1762652580.017277", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "arcee-ai/Virtuoso-Small-v2", - "developer": "arcee-ai", - "inference_platform": "unknown", - "id": "arcee-ai/Virtuoso-Small-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8273181824226385 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6554097094586643 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.466012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35318791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43133333333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.518783244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Virtuoso-Small/cc51c0e0-4e5d-496c-bf02-8b5d8f474cd3.json b/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Virtuoso-Small/cc51c0e0-4e5d-496c-bf02-8b5d8f474cd3.json deleted file mode 100644 index 0c1451effe3474baed14ebf90c499a051afc360e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Virtuoso-Small/cc51c0e0-4e5d-496c-bf02-8b5d8f474cd3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/arcee-ai_Virtuoso-Small/1762652580.017056", - "retrieved_timestamp": "1762652580.017057", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "arcee-ai/Virtuoso-Small", - "developer": "arcee-ai", - "inference_platform": "unknown", - "id": "arcee-ai/Virtuoso-Small" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7935211904413622 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6517633129454784 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4093655589123867 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43390625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5191156914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_raspberry-3B/cef8c893-a903-4e30-b7e1-5f2fe8f2ac82.json b/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_raspberry-3B/cef8c893-a903-4e30-b7e1-5f2fe8f2ac82.json deleted file mode 100644 index 395b28f0f819c47cb7e874bc19cac58f93c8ea4d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_raspberry-3B/cef8c893-a903-4e30-b7e1-5f2fe8f2ac82.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/arcee-ai_raspberry-3B/1762652580.017479", - "retrieved_timestamp": "1762652580.017479", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "arcee-ai/raspberry-3B", - "developer": "arcee-ai", - "inference_platform": "unknown", - "id": "arcee-ai/raspberry-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31541642840995227 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42689280188827033 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41232291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.285405585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/argilla/argilla_notus-7b-v1/c06f66ea-d9e3-4902-b3fd-188110f9c1e4.json b/leaderboard_data/HFOpenLLMv2/argilla/argilla_notus-7b-v1/c06f66ea-d9e3-4902-b3fd-188110f9c1e4.json deleted file mode 100644 index d736355e2531573ffaa45e07dc0af56fdbb575f5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/argilla/argilla_notus-7b-v1/c06f66ea-d9e3-4902-b3fd-188110f9c1e4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/argilla_notus-7b-v1/1762652580.017684", - "retrieved_timestamp": "1762652580.017685", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "argilla/notus-7b-v1", - "developer": "argilla", - "inference_platform": "unknown", - "id": "argilla/notus-7b-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.508207112683236 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4511857407381495 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33641666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3003656914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/argilla/argilla_notux-8x7b-v1/60185907-11c2-454c-bfbc-3c5741651ab7.json b/leaderboard_data/HFOpenLLMv2/argilla/argilla_notux-8x7b-v1/60185907-11c2-454c-bfbc-3c5741651ab7.json deleted file mode 100644 index 72b15cf4e7c6bd1ade9554ed5c0fa54f4f683966..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/argilla/argilla_notux-8x7b-v1/60185907-11c2-454c-bfbc-3c5741651ab7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/argilla_notux-8x7b-v1/1762652580.017979", - "retrieved_timestamp": "1762652580.0179799", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "argilla/notux-8x7b-v1", - "developer": "argilla", - "inference_platform": "unknown", - "id": "argilla/notux-8x7b-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5422290633297429 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5363304164516353 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41759375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3660239361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/arisin/arisin_orca-platypus-13B-slerp/ecd45b21-21f7-49e2-b314-c7b678bdc8c1.json b/leaderboard_data/HFOpenLLMv2/arisin/arisin_orca-platypus-13B-slerp/ecd45b21-21f7-49e2-b314-c7b678bdc8c1.json deleted file mode 100644 index 9d8d47f754c02f8ddd0e249db3231159297a8035..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/arisin/arisin_orca-platypus-13B-slerp/ecd45b21-21f7-49e2-b314-c7b678bdc8c1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/arisin_orca-platypus-13B-slerp/1762652580.018446", - "retrieved_timestamp": "1762652580.018446", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "arisin/orca-platypus-13B-slerp", - "developer": "arisin", - "inference_platform": "unknown", - "id": "arisin/orca-platypus-13B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26718107953563214 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46306234976954946 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4253125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2592253989361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.016 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/arshiaafshani/arshiaafshani_Arsh-V1/6f40503d-59ee-4cdc-a697-ef405d9644a7.json b/leaderboard_data/HFOpenLLMv2/arshiaafshani/arshiaafshani_Arsh-V1/6f40503d-59ee-4cdc-a697-ef405d9644a7.json deleted file mode 100644 index 36e941a23c9f4ca6f6b22e78266fb20a788df8e5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/arshiaafshani/arshiaafshani_Arsh-V1/6f40503d-59ee-4cdc-a697-ef405d9644a7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/arshiaafshani_Arsh-V1/1762652580.0186949", - "retrieved_timestamp": "1762652580.0186958", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "arshiaafshani/Arsh-V1", - "developer": "arshiaafshani", - "inference_platform": "unknown", - "id": "arshiaafshani/Arsh-V1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6043276284702368 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6739657491720434 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2620845921450151 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48989583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5256815159574468 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.96 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ashercn97/ashercn97_a1-v0.0.1/a9e3fe74-400c-444c-9b28-6f49c6671f96.json b/leaderboard_data/HFOpenLLMv2/ashercn97/ashercn97_a1-v0.0.1/a9e3fe74-400c-444c-9b28-6f49c6671f96.json deleted file mode 100644 index 119362002823d32a4606d48ad69ab2c0f07aee4d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ashercn97/ashercn97_a1-v0.0.1/a9e3fe74-400c-444c-9b28-6f49c6671f96.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ashercn97_a1-v0.0.1/1762652580.019211", - "retrieved_timestamp": "1762652580.019212", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ashercn97/a1-v0.0.1", - "developer": "ashercn97", - "inference_platform": "unknown", - "id": "ashercn97/a1-v0.0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21984445715146922 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5188122863232913 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4119791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41647273936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ashercn97/ashercn97_a1-v002/509c2895-70ae-4381-94ef-f6cdf9ee07ef.json b/leaderboard_data/HFOpenLLMv2/ashercn97/ashercn97_a1-v002/509c2895-70ae-4381-94ef-f6cdf9ee07ef.json deleted file mode 100644 index 38599401c114e084f0b3cff79b36fff9bc9a41c3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ashercn97/ashercn97_a1-v002/509c2895-70ae-4381-94ef-f6cdf9ee07ef.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ashercn97_a1-v002/1762652580.019455", - "retrieved_timestamp": "1762652580.019456", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ashercn97/a1-v002", - "developer": "ashercn97", - "inference_platform": "unknown", - "id": "ashercn97/a1-v002" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2584631001298776 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5261137844506322 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41591666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41747007978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/assskelad/assskelad_smollm2-360M-sft_SmallThoughts/ce2f5cc8-a187-454d-ba99-4446d29aab7c.json b/leaderboard_data/HFOpenLLMv2/assskelad/assskelad_smollm2-360M-sft_SmallThoughts/ce2f5cc8-a187-454d-ba99-4446d29aab7c.json deleted file mode 100644 index 7b71bd5e9f94ccf6c07d6dd00ed67cd03166f187..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/assskelad/assskelad_smollm2-360M-sft_SmallThoughts/ce2f5cc8-a187-454d-ba99-4446d29aab7c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/assskelad_smollm2-360M-sft_SmallThoughts/1762652580.019667", - "retrieved_timestamp": "1762652580.0196679", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "assskelad/smollm2-360M-sft_SmallThoughts", - "developer": "assskelad", - "inference_platform": "unknown", - "id": "assskelad/smollm2-360M-sft_SmallThoughts" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20071078072846715 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3149572469619188 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3395208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11818484042553191 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.362 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/athirdpath/athirdpath_Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit/9255090f-6862-4ff1-ac91-fe0cd7613445.json b/leaderboard_data/HFOpenLLMv2/athirdpath/athirdpath_Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit/9255090f-6862-4ff1-ac91-fe0cd7613445.json deleted file mode 100644 index 854c012b84c77e05c2d53ee0ca5e04891c6ac057..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/athirdpath/athirdpath_Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit/9255090f-6862-4ff1-ac91-fe0cd7613445.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/athirdpath_Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit/1762652580.019914", - "retrieved_timestamp": "1762652580.019914", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit", - "developer": "athirdpath", - "inference_platform": "unknown", - "id": "athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4521037513796726 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4939066588253951 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3863958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3564660904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/automerger/automerger_YamshadowExperiment28-7B/1fa5dee9-c360-40d9-8e67-9b415cd36616.json b/leaderboard_data/HFOpenLLMv2/automerger/automerger_YamshadowExperiment28-7B/1fa5dee9-c360-40d9-8e67-9b415cd36616.json deleted file mode 100644 index 24a115bfe3d3784b36bd8b0e16576ca6e2ee9b67..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/automerger/automerger_YamshadowExperiment28-7B/1fa5dee9-c360-40d9-8e67-9b415cd36616.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/automerger_YamshadowExperiment28-7B/1762652580.020166", - "retrieved_timestamp": "1762652580.0201669", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "automerger/YamshadowExperiment28-7B", - "developer": "automerger", - "inference_platform": "unknown", - "id": "automerger/YamshadowExperiment28-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070156074770498 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5150030227855061 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4306145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30601728723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/avemio/avemio_GRAG-NEMO-12B-ORPO-HESSIAN-AI/45cc7b31-3f75-42f7-9b07-3cf704fd2b55.json b/leaderboard_data/HFOpenLLMv2/avemio/avemio_GRAG-NEMO-12B-ORPO-HESSIAN-AI/45cc7b31-3f75-42f7-9b07-3cf704fd2b55.json deleted file mode 100644 index bd276659edf858fe751cd06c17b236392b91bb2d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/avemio/avemio_GRAG-NEMO-12B-ORPO-HESSIAN-AI/45cc7b31-3f75-42f7-9b07-3cf704fd2b55.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/avemio_GRAG-NEMO-12B-ORPO-HESSIAN-AI/1762652580.020413", - "retrieved_timestamp": "1762652580.0204139", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI", - "developer": "avemio", - "inference_platform": "unknown", - "id": "avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26065954545866094 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3446666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10605053191489362 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/baconnier/baconnier_Napoleon_24B_V0.0/88fb101e-35dd-40af-922f-9b66a2711249.json b/leaderboard_data/HFOpenLLMv2/baconnier/baconnier_Napoleon_24B_V0.0/88fb101e-35dd-40af-922f-9b66a2711249.json deleted file mode 100644 index b655eb995dbc57cbc77b051891d479d69b8a90c0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/baconnier/baconnier_Napoleon_24B_V0.0/88fb101e-35dd-40af-922f-9b66a2711249.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/baconnier_Napoleon_24B_V0.0/1762652580.0222468", - "retrieved_timestamp": "1762652580.022248", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "baconnier/Napoleon_24B_V0.0", - "developer": "baconnier", - "inference_platform": "unknown", - "id": "baconnier/Napoleon_24B_V0.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1801021290176731 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6367110843973786 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22734138972809667 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4419895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5039893617021277 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/baconnier/baconnier_Napoleon_24B_V0.2/4857d2d0-1a4b-4544-8b1e-fb4b01618a3b.json b/leaderboard_data/HFOpenLLMv2/baconnier/baconnier_Napoleon_24B_V0.2/4857d2d0-1a4b-4544-8b1e-fb4b01618a3b.json deleted file mode 100644 index 2de000b54abc1bb4419a6c7f3aea5add9ee37ed9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/baconnier/baconnier_Napoleon_24B_V0.2/4857d2d0-1a4b-4544-8b1e-fb4b01618a3b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/baconnier_Napoleon_24B_V0.2/1762652580.022489", - "retrieved_timestamp": "1762652580.022489", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "baconnier/Napoleon_24B_V0.2", - "developer": "baconnier", - "inference_platform": "unknown", - "id": "baconnier/Napoleon_24B_V0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2527172347150006 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5910621269874454 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4459583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4356715425531915 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/baebee/baebee_7B-Cetacea/5985fed7-9c54-458d-8f64-533e248a38da.json b/leaderboard_data/HFOpenLLMv2/baebee/baebee_7B-Cetacea/5985fed7-9c54-458d-8f64-533e248a38da.json deleted file mode 100644 index b5fb9fefb563617455a6ad1fd1029140002463a6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/baebee/baebee_7B-Cetacea/5985fed7-9c54-458d-8f64-533e248a38da.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/baebee_7B-Cetacea/1762652580.022699", - "retrieved_timestamp": "1762652580.022699", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "baebee/7B-Cetacea", - "developer": "baebee", - "inference_platform": "unknown", - "id": "baebee/7B-Cetacea" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5278660620486975 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4757171853895546 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41362499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2954621010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/baebee/baebee_mergekit-model_stock-nzjnheg/e847afb0-c8ac-4cce-b0f9-1667c9fbef3c.json b/leaderboard_data/HFOpenLLMv2/baebee/baebee_mergekit-model_stock-nzjnheg/e847afb0-c8ac-4cce-b0f9-1667c9fbef3c.json deleted file mode 100644 index f360bc36ace9744296d5a1fd42d1e74f44ac3fa3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/baebee/baebee_mergekit-model_stock-nzjnheg/e847afb0-c8ac-4cce-b0f9-1667c9fbef3c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/baebee_mergekit-model_stock-nzjnheg/1762652580.022936", - "retrieved_timestamp": "1762652580.022937", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "baebee/mergekit-model_stock-nzjnheg", - "developer": "baebee", - "inference_platform": "unknown", - "id": "baebee/mergekit-model_stock-nzjnheg" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48442687624392167 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5287391310729729 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16767371601208458 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38466666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699301861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/baebee/baebee_mergekit-ties-fnjenli/21b3d7d0-301d-431d-9cfc-a0ad1e326f03.json b/leaderboard_data/HFOpenLLMv2/baebee/baebee_mergekit-ties-fnjenli/21b3d7d0-301d-431d-9cfc-a0ad1e326f03.json deleted file mode 100644 index 8b09397de01e1f4499888101cc442c0d0a223b2a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/baebee/baebee_mergekit-ties-fnjenli/21b3d7d0-301d-431d-9cfc-a0ad1e326f03.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/baebee_mergekit-ties-fnjenli/1762652580.0231512", - "retrieved_timestamp": "1762652580.023152", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "baebee/mergekit-ties-fnjenli", - "developer": "baebee", - "inference_platform": "unknown", - "id": "baebee/mergekit-ties-fnjenli" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19881248420856662 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30236959112076134 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.002265861027190332 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24496644295302014 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4019375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11286569148936171 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.1v/ae256440-486f-43cf-b4a3-8d5c0ff196c9.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.1v/ae256440-486f-43cf-b4a3-8d5c0ff196c9.json deleted file mode 100644 index 4e2f8cb3827c7a54a9e7e2ea15d555f4a8f36850..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.1v/ae256440-486f-43cf-b4a3-8d5c0ff196c9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.1v/1762652580.023659", - "retrieved_timestamp": "1762652580.023659", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.1v", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.1v" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36362628935668473 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5436022524587655 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41315624999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3673537234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.2v/d509b0d3-a043-4057-bf80-37ec5ceedeed.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.2v/d509b0d3-a043-4057-bf80-37ec5ceedeed.json deleted file mode 100644 index 324dfa648d7fe80bf583fd3329b8018391a79b3a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.2v/d509b0d3-a043-4057-bf80-37ec5ceedeed.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.2v/1762652580.023869", - "retrieved_timestamp": "1762652580.02387", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.2v", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.2v" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3623773809048879 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5434355857920987 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12613293051359517 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41582291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36627327127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.3v/8e2e1f2f-4715-4b8b-b641-d5e552500408.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.3v/8e2e1f2f-4715-4b8b-b641-d5e552500408.json deleted file mode 100644 index dd61b416c2631612833980d04700d071bc7cbebe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.3v/8e2e1f2f-4715-4b8b-b641-d5e552500408.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.3v/1762652580.02432", - "retrieved_timestamp": "1762652580.024322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.3v", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.3v" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38698209639312575 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5431389316665282 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1336858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41312499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3663563829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.4v/4072cc72-b6b4-4a5d-8f01-f9f8437ea569.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.4v/4072cc72-b6b4-4a5d-8f01-f9f8437ea569.json deleted file mode 100644 index be9a70e8728b3885dc322bc337068842bd710221..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.4v/4072cc72-b6b4-4a5d-8f01-f9f8437ea569.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.4v/1762652580.024673", - "retrieved_timestamp": "1762652580.024674", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.4v", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.4v" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6508142838778884 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5094241395384186 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1351963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41762499999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36826795212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.5v/fa2e9cff-4a7b-4efd-98ca-b8fd2cb33928.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.5v/fa2e9cff-4a7b-4efd-98ca-b8fd2cb33928.json deleted file mode 100644 index fdc5ac35e2b2c88da6248c38f6a780384bc9f642..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.5v/fa2e9cff-4a7b-4efd-98ca-b8fd2cb33928.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.5v/1762652580.0249128", - "retrieved_timestamp": "1762652580.024914", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.5v", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.5v" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3745672593163916 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5421932988679541 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13670694864048338 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41315624999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36610704787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.6v/a58c4863-e5a9-425d-ad3e-5924d6146718.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.6v/a58c4863-e5a9-425d-ad3e-5924d6146718.json deleted file mode 100644 index 034b074cbaa9fb20b1328f13d34b78389e956a86..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.6v/a58c4863-e5a9-425d-ad3e-5924d6146718.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.6v/1762652580.025138", - "retrieved_timestamp": "1762652580.0251389", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.6v", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.6v" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43656608908806416 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5448909065942131 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12537764350453173 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4184895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3661901595744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_III_IV_V/c2e334b3-e82d-40bb-a6ed-9a941bf2352a.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_III_IV_V/c2e334b3-e82d-40bb-a6ed-9a941bf2352a.json deleted file mode 100644 index e2ff6f620ab8e77e5bec3d987bce06f6905b1899..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_III_IV_V/c2e334b3-e82d-40bb-a6ed-9a941bf2352a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_III_IV_V/1762652580.0253649", - "retrieved_timestamp": "1762652580.025366", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40309379114083965 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.54645347832278 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41982291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3664394946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_III_ex_V/6f31292a-b09f-4e2c-ae3c-b093c5ba06c6.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_III_ex_V/6f31292a-b09f-4e2c-ae3c-b093c5ba06c6.json deleted file mode 100644 index e9ffa7b59a42a25c4a8cbab4ebfa49d496799d9f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_III_ex_V/6f31292a-b09f-4e2c-ae3c-b093c5ba06c6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_III_ex_V/1762652580.025593", - "retrieved_timestamp": "1762652580.025593", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_III_ex_V", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/MISCHIEVOUS-12B-Mix_III_ex_V" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43162032296528763 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5448926891254073 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13217522658610273 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4197916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3648603723404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_Neo/089a5215-70a4-4255-ac01-1b70d4e8a494.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_Neo/089a5215-70a4-4255-ac01-1b70d4e8a494.json deleted file mode 100644 index 1b433a24f6efc5e6b103aaacd746387997df2070..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_Neo/089a5215-70a4-4255-ac01-1b70d4e8a494.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_Neo/1762652580.0258071", - "retrieved_timestamp": "1762652580.0258079", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/MISCHIEVOUS-12B-Mix_Neo", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/MISCHIEVOUS-12B-Mix_Neo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6249606599378538 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5077574728717519 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41502083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36851728723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B/49ec948c-c06d-4c01-be83-9f74ed15ea17.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B/49ec948c-c06d-4c01-be83-9f74ed15ea17.json deleted file mode 100644 index 47ad61c32c69c77657a951c6b5576e58158e225c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B/49ec948c-c06d-4c01-be83-9f74ed15ea17.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B/1762652580.02337", - "retrieved_timestamp": "1762652580.02337", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/MISCHIEVOUS-12B", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/MISCHIEVOUS-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3851835352420466 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5404981575206657 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4144895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3671875 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_NameLess-12B-prob/81670e41-16d6-43a6-9af9-6924a52a8300.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_NameLess-12B-prob/81670e41-16d6-43a6-9af9-6924a52a8300.json deleted file mode 100644 index 848eb9c3e29af398a749ed46784cbbed7c775649..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_NameLess-12B-prob/81670e41-16d6-43a6-9af9-6924a52a8300.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_NameLess-12B-prob/1762652580.026292", - "retrieved_timestamp": "1762652580.026293", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/NameLess-12B-prob", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/NameLess-12B-prob" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6602315190361574 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5158141019151304 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12613293051359517 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.433625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3684341755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-0.1v/2d468a71-7364-40eb-8a98-1dbac956b3cf.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-0.1v/2d468a71-7364-40eb-8a98-1dbac956b3cf.json deleted file mode 100644 index 09777d04a12f8880039a09148107ce6c88568300..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-0.1v/2d468a71-7364-40eb-8a98-1dbac956b3cf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-0.1v/1762652580.026718", - "retrieved_timestamp": "1762652580.026719", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B-0.1v", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B-0.1v" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36574954454181574 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5412276004529172 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13217522658610273 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41582291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36826795212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 6.124 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-0.X.ver/d0c92f20-72d0-431c-b8ba-881b3a6ae158.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-0.X.ver/d0c92f20-72d0-431c-b8ba-881b3a6ae158.json deleted file mode 100644 index 061f431bc4df59424ec0058099fcfb85762fb214..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-0.X.ver/d0c92f20-72d0-431c-b8ba-881b3a6ae158.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-0.X.ver/1762652580.0269299", - "retrieved_timestamp": "1762652580.0269299", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B-0.X.ver", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B-0.X.ver" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37756486123485683 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.541624689936422 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41982291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36710438829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 6.124 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-ALPHA/0053cf6a-0e1e-49c5-8d0a-b3d7254e22f3.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-ALPHA/0053cf6a-0e1e-49c5-8d0a-b3d7254e22f3.json deleted file mode 100644 index 9e4b4a2bef67c65005f8f09cfa57d5e50fed705e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-ALPHA/0053cf6a-0e1e-49c5-8d0a-b3d7254e22f3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-ALPHA/1762652580.0271401", - "retrieved_timestamp": "1762652580.027141", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B-ALPHA", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B-ALPHA" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6365011502812536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5093679898057982 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13670694864048338 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4202916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3696808510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-BETA/2f023511-2446-48f8-83e5-47225f15e905.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-BETA/2f023511-2446-48f8-83e5-47225f15e905.json deleted file mode 100644 index ee84f26e18a1026e770a557c9524eca3b48ef938..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-BETA/2f023511-2446-48f8-83e5-47225f15e905.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-BETA/1762652580.0273511", - "retrieved_timestamp": "1762652580.0273511", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B-BETA", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B-BETA" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6720967034136092 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5155964285724085 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4309895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36785239361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-DELTA/fcaf0de1-f4f5-4bfb-8276-29b3b1f5b5be.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-DELTA/fcaf0de1-f4f5-4bfb-8276-29b3b1f5b5be.json deleted file mode 100644 index 7e48cf62590e0f1eccf3b4651871d5fddaf3d7f9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-DELTA/fcaf0de1-f4f5-4bfb-8276-29b3b1f5b5be.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-DELTA/1762652580.027563", - "retrieved_timestamp": "1762652580.027563", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B-DELTA", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B-DELTA" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6468924675416783 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5055418480543742 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13746223564954682 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40565625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3651097074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 6.124 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-DIGAMMA/67e74757-9950-499e-9258-7ccd20b29835.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-DIGAMMA/67e74757-9950-499e-9258-7ccd20b29835.json deleted file mode 100644 index fe5bcd44614e2ca868550692984e32e01b776ae4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-DIGAMMA/67e74757-9950-499e-9258-7ccd20b29835.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-DIGAMMA/1762652580.027769", - "retrieved_timestamp": "1762652580.02777", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B-DIGAMMA", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B-DIGAMMA" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6429207835210575 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.506116784464076 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1336858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40965625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36585771276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 6.124 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-EPSILON/38864e75-9bb0-4eaa-ba87-c631838a9ad1.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-EPSILON/38864e75-9bb0-4eaa-ba87-c631838a9ad1.json deleted file mode 100644 index f794c78907ab614f6e27a84e344f0df65b928134..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-EPSILON/38864e75-9bb0-4eaa-ba87-c631838a9ad1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-EPSILON/1762652580.0279832", - "retrieved_timestamp": "1762652580.0279832", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B-EPSILON", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B-EPSILON" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6304560787599126 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5037995611302296 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12613293051359517 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4069895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36477726063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 6.124 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-GAMMA/4507a6c1-bfff-4e8d-92c6-7e923f74c4dc.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-GAMMA/4507a6c1-bfff-4e8d-92c6-7e923f74c4dc.json deleted file mode 100644 index bfd838bcd876fb93192d2b5b4f8f14e7fabc3bee..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-GAMMA/4507a6c1-bfff-4e8d-92c6-7e923f74c4dc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-GAMMA/1762652580.028181", - "retrieved_timestamp": "1762652580.028182", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B-GAMMA", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B-GAMMA" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6361764562472019 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5181908355069679 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13066465256797583 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43632291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3666057180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-NEMO/6a9c649c-fbcd-489a-bc01-083014932a45.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-NEMO/6a9c649c-fbcd-489a-bc01-083014932a45.json deleted file mode 100644 index 3cacdb9e98e2ecca792ac1a153b2d081f45b2ca4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-NEMO/6a9c649c-fbcd-489a-bc01-083014932a45.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-NEMO/1762652580.028384", - "retrieved_timestamp": "1762652580.028385", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B-NEMO", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B-NEMO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40221944440750546 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5441680901949261 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42506249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37159242021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-OMEGA/a630e843-ec9c-432b-986a-2b181c789507.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-OMEGA/a630e843-ec9c-432b-986a-2b181c789507.json deleted file mode 100644 index a7c8298259ffb24946351bbc4b95c95148722d33..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-OMEGA/a630e843-ec9c-432b-986a-2b181c789507.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-OMEGA/1762652580.028594", - "retrieved_timestamp": "1762652580.028594", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B-OMEGA", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B-OMEGA" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6699734482284783 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.516644373777888 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43232291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36768617021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-UNION/20d0e946-e7cf-48a6-a81e-f73d774e0e2b.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-UNION/20d0e946-e7cf-48a6-a81e-f73d774e0e2b.json deleted file mode 100644 index 871c3c355b38e939d203db70f4b3da5f771797d1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-UNION/20d0e946-e7cf-48a6-a81e-f73d774e0e2b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-UNION/1762652580.028806", - "retrieved_timestamp": "1762652580.028807", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B-UNION", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B-UNION" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6428709158366468 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5106643448765741 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13897280966767372 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4256875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3671875 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 6.124 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B/f2ef86c9-e968-42e0-a0d0-1cf79f9c249b.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B/f2ef86c9-e968-42e0-a0d0-1cf79f9c249b.json deleted file mode 100644 index b840402a32532e60afbeefde252f03767b092858..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B/f2ef86c9-e968-42e0-a0d0-1cf79f9c249b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B/1762652580.026504", - "retrieved_timestamp": "1762652580.026504", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37156965739792636 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5436022524587655 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4104895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36785239361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 6.124 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B_Razor/950f6bff-e0ec-4556-85b7-81444008d1d4.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B_Razor/950f6bff-e0ec-4556-85b7-81444008d1d4.json deleted file mode 100644 index 925cada3344cf6ab91cd862ccfae2693c90118a0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B_Razor/950f6bff-e0ec-4556-85b7-81444008d1d4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B_Razor/1762652580.029016", - "retrieved_timestamp": "1762652580.029016", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/VICIOUS_MESH-12B_Razor", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/VICIOUS_MESH-12B_Razor" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37364304489864675 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5447127693928118 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40915624999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36685505319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 6.124 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_mergekit-model_stock-zdaysvi/8932da66-d29a-4453-9b61-bee48f1a28f1.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_mergekit-model_stock-zdaysvi/8932da66-d29a-4453-9b61-bee48f1a28f1.json deleted file mode 100644 index dabbce7beb54f9d8a0164653cc3e555cd888e2ab..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_mergekit-model_stock-zdaysvi/8932da66-d29a-4453-9b61-bee48f1a28f1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_mergekit-model_stock-zdaysvi/1762652580.029272", - "retrieved_timestamp": "1762652580.029272", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/mergekit-model_stock-zdaysvi", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/mergekit-model_stock-zdaysvi" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6425960894870055 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5062803896601668 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1351963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41238541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36884973404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 6.124 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_mergekit-ties-sinbkow/b8c00b3b-c35a-4511-965b-6096e9b116de.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_mergekit-ties-sinbkow/b8c00b3b-c35a-4511-965b-6096e9b116de.json deleted file mode 100644 index f8d280c9ca95a85101d207871c841af2aac946f8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_mergekit-ties-sinbkow/b8c00b3b-c35a-4511-965b-6096e9b116de.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_mergekit-ties-sinbkow/1762652580.029482", - "retrieved_timestamp": "1762652580.029482", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/mergekit-ties-sinbkow", - "developer": "bamec66557", - "inference_platform": "unknown", - "id": "bamec66557/mergekit-ties-sinbkow" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6431956098706986 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5092084289828543 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14501510574018128 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40447916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36028922872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 6.124 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/belztjti/belztjti_dffghgjh/82b3c9ac-16bb-4fd0-8bed-af1ac598a424.json b/leaderboard_data/HFOpenLLMv2/belztjti/belztjti_dffghgjh/82b3c9ac-16bb-4fd0-8bed-af1ac598a424.json deleted file mode 100644 index 947c8cb4fe4d45109dd7ef3c7fccb63b0e91d46a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/belztjti/belztjti_dffghgjh/82b3c9ac-16bb-4fd0-8bed-af1ac598a424.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/belztjti_dffghgjh/1762652580.0296938", - "retrieved_timestamp": "1762652580.029695", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "belztjti/dffghgjh", - "developer": "belztjti", - "inference_platform": "unknown", - "id": "belztjti/dffghgjh" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5784241368457914 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35817085768640783 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34745833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3421708776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GlmForCausalLM", - "params_billions": 9.543 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/belztjti/belztjti_dtfgv/655ea5ea-d94a-43eb-a4bf-182fd021d65a.json b/leaderboard_data/HFOpenLLMv2/belztjti/belztjti_dtfgv/655ea5ea-d94a-43eb-a4bf-182fd021d65a.json deleted file mode 100644 index a3f77aecf84f01bb52d7143f69b56d7ac65be339..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/belztjti/belztjti_dtfgv/655ea5ea-d94a-43eb-a4bf-182fd021d65a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/belztjti_dtfgv/1762652580.029931", - "retrieved_timestamp": "1762652580.029932", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "belztjti/dtfgv", - "developer": "belztjti", - "inference_platform": "unknown", - "id": "belztjti/dtfgv" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.334450369464133 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32815316667476035 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3793958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15043218085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 9.543 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/beowolx/beowolx_CodeNinja-1.0-OpenChat-7B/fbe7d86c-8d1e-474a-bf85-35a139bdb08f.json b/leaderboard_data/HFOpenLLMv2/beowolx/beowolx_CodeNinja-1.0-OpenChat-7B/fbe7d86c-8d1e-474a-bf85-35a139bdb08f.json deleted file mode 100644 index bf43f33ec374f08f3069c74723ad7908fc00d514..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/beowolx/beowolx_CodeNinja-1.0-OpenChat-7B/fbe7d86c-8d1e-474a-bf85-35a139bdb08f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/beowolx_CodeNinja-1.0-OpenChat-7B/1762652580.030703", - "retrieved_timestamp": "1762652580.030704", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "beowolx/CodeNinja-1.0-OpenChat-7B", - "developer": "beowolx", - "inference_platform": "unknown", - "id": "beowolx/CodeNinja-1.0-OpenChat-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5446770125489258 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4441338669403703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42432291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3015292553191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/berkeley-nest/berkeley-nest_Starling-LM-7B-alpha/ddc116b6-5b9a-409f-a0ab-09e5630d1289.json b/leaderboard_data/HFOpenLLMv2/berkeley-nest/berkeley-nest_Starling-LM-7B-alpha/ddc116b6-5b9a-409f-a0ab-09e5630d1289.json deleted file mode 100644 index b266e095dc6c94423fdb47753012c51c9f88124c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/berkeley-nest/berkeley-nest_Starling-LM-7B-alpha/ddc116b6-5b9a-409f-a0ab-09e5630d1289.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/berkeley-nest_Starling-LM-7B-alpha/1762652580.030957", - "retrieved_timestamp": "1762652580.0309582", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "berkeley-nest/Starling-LM-7B-alpha", - "developer": "berkeley-nest", - "inference_platform": "unknown", - "id": "berkeley-nest/Starling-LM-7B-alpha" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5480491761858536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4440065261164004 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08383685800604229 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41201041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3171542553191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_Gunny/e7d0c3d5-d962-49b5-a4b7-3cb7ac12735c.json b/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_Gunny/e7d0c3d5-d962-49b5-a4b7-3cb7ac12735c.json deleted file mode 100644 index da4dce67dcba7fada15104f000154f600b870ec4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_Gunny/e7d0c3d5-d962-49b5-a4b7-3cb7ac12735c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bfuzzy1_Gunny/1762652580.031208", - "retrieved_timestamp": "1762652580.031209", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bfuzzy1/Gunny", - "developer": "bfuzzy1", - "inference_platform": "unknown", - "id": "bfuzzy1/Gunny" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7128629813339716 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45459857092962414 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1729607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35828124999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3038563829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_acheron-c/71268c77-565a-401b-a51d-122060ed5945.json b/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_acheron-c/71268c77-565a-401b-a51d-122060ed5945.json deleted file mode 100644 index 91c18cdd9b2d806418648abc034977d8d120c307..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_acheron-c/71268c77-565a-401b-a51d-122060ed5945.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bfuzzy1_acheron-c/1762652580.031654", - "retrieved_timestamp": "1762652580.0316548", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bfuzzy1/acheron-c", - "developer": "bfuzzy1", - "inference_platform": "unknown", - "id": "bfuzzy1/acheron-c" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19286714805604685 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30260703404313577 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0030211480362537764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33821875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1171875 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.514 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_acheron-d/1c9ba45f-1f3b-42ad-a603-ea7039fee22e.json b/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_acheron-d/1c9ba45f-1f3b-42ad-a603-ea7039fee22e.json deleted file mode 100644 index b7f366fb073ae982ed959790ac557ec80c5942e4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_acheron-d/1c9ba45f-1f3b-42ad-a603-ea7039fee22e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bfuzzy1_acheron-d/1762652580.031856", - "retrieved_timestamp": "1762652580.031857", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bfuzzy1/acheron-d", - "developer": "bfuzzy1", - "inference_platform": "unknown", - "id": "bfuzzy1/acheron-d" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.192542454021995 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3139959864926003 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23657718120805368 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34971875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11344747340425532 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.514 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_acheron-m/fdd707f8-df0b-4384-bc77-35f3fa8ec0a0.json b/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_acheron-m/fdd707f8-df0b-4384-bc77-35f3fa8ec0a0.json deleted file mode 100644 index 30d3673028a36e9bdfc49951020317614a40ef6f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_acheron-m/fdd707f8-df0b-4384-bc77-35f3fa8ec0a0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bfuzzy1_acheron-m/1762652580.032056", - "retrieved_timestamp": "1762652580.032057", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bfuzzy1/acheron-m", - "developer": "bfuzzy1", - "inference_platform": "unknown", - "id": "bfuzzy1/acheron-m" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17583123889058808 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29284447696551025 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3486666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11128656914893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.514 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_acheron/2b74949a-c0a3-4061-8cf4-4330850af288.json b/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_acheron/2b74949a-c0a3-4061-8cf4-4330850af288.json deleted file mode 100644 index 97a3e12584fb681975e2710b1efd21c76d0497f1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_acheron/2b74949a-c0a3-4061-8cf4-4330850af288.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bfuzzy1_acheron/1762652580.031447", - "retrieved_timestamp": "1762652580.031447", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bfuzzy1/acheron", - "developer": "bfuzzy1", - "inference_platform": "unknown", - "id": "bfuzzy1/acheron" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19831269919369493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3107918622526179 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23909395973154363 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3510520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10962433510638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.514 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_llambses-1/3f04797b-fe6d-4cd5-a49e-b898a8db26a6.json b/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_llambses-1/3f04797b-fe6d-4cd5-a49e-b898a8db26a6.json deleted file mode 100644 index b55af96a5aa0b7cfa7f4f8c9eb072a383617082c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_llambses-1/3f04797b-fe6d-4cd5-a49e-b898a8db26a6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bfuzzy1_llambses-1/1762652580.032492", - "retrieved_timestamp": "1762652580.032493", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bfuzzy1/llambses-1", - "developer": "bfuzzy1", - "inference_platform": "unknown", - "id": "bfuzzy1/llambses-1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3553837152089788 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5046977405175623 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45290625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31399601063829785 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bhuvneshsaini/bhuvneshsaini_merged_model/44e6cddd-4ecc-499f-a6b7-d8ee0640c2f9.json b/leaderboard_data/HFOpenLLMv2/bhuvneshsaini/bhuvneshsaini_merged_model/44e6cddd-4ecc-499f-a6b7-d8ee0640c2f9.json deleted file mode 100644 index 362361b1603ef0a5b6e52245f565866f33a20b35..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bhuvneshsaini/bhuvneshsaini_merged_model/44e6cddd-4ecc-499f-a6b7-d8ee0640c2f9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bhuvneshsaini_merged_model/1762652580.032705", - "retrieved_timestamp": "1762652580.032706", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bhuvneshsaini/merged_model", - "developer": "bhuvneshsaini", - "inference_platform": "unknown", - "id": "bhuvneshsaini/merged_model" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1812767900282362 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3359777949071243 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34971875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14453125 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.715 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bigcode/bigcode_starcoder2-15b/09aa04cf-9369-453f-952a-2f6c74e4707a.json b/leaderboard_data/HFOpenLLMv2/bigcode/bigcode_starcoder2-15b/09aa04cf-9369-453f-952a-2f6c74e4707a.json deleted file mode 100644 index 2b9fad2c1347015b758cc95a413345795ef72187..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bigcode/bigcode_starcoder2-15b/09aa04cf-9369-453f-952a-2f6c74e4707a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bigcode_starcoder2-15b/1762652580.032956", - "retrieved_timestamp": "1762652580.0329568", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bigcode/starcoder2-15b", - "developer": "bigcode", - "inference_platform": "unknown", - "id": "bigcode/starcoder2-15b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2780223141265177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4447957841230437 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05966767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35009375000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23528922872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Starcoder2ForCausalLM", - "params_billions": 15.958 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bigcode/bigcode_starcoder2-3b/7385c595-5b4f-4491-8e71-ece57ffffbd2.json b/leaderboard_data/HFOpenLLMv2/bigcode/bigcode_starcoder2-3b/7385c595-5b4f-4491-8e71-ece57ffffbd2.json deleted file mode 100644 index 0d2c8119a5f78c1f08d602deb4a0a92e13283a92..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bigcode/bigcode_starcoder2-3b/7385c595-5b4f-4491-8e71-ece57ffffbd2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bigcode_starcoder2-3b/1762652580.0331972", - "retrieved_timestamp": "1762652580.0331972", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bigcode/starcoder2-3b", - "developer": "bigcode", - "inference_platform": "unknown", - "id": "bigcode/starcoder2-3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20370838264693236 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35087141384601755 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24412751677852348 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34345833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1636469414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Starcoder2ForCausalLM", - "params_billions": 3.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bigcode/bigcode_starcoder2-7b/53eac61a-064e-4786-bc94-962382d88f77.json b/leaderboard_data/HFOpenLLMv2/bigcode/bigcode_starcoder2-7b/53eac61a-064e-4786-bc94-962382d88f77.json deleted file mode 100644 index abce2d86e976143cade702558d38febc36048dcc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bigcode/bigcode_starcoder2-7b/53eac61a-064e-4786-bc94-962382d88f77.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bigcode_starcoder2-7b/1762652580.0333922", - "retrieved_timestamp": "1762652580.0333922", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bigcode/starcoder2-7b", - "developer": "bigcode", - "inference_platform": "unknown", - "id": "bigcode/starcoder2-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22091938279321088 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36609857669123036 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3793333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16422872340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Starcoder2ForCausalLM", - "params_billions": 7.174 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-1b1/284ba4fb-cae4-46ac-a5dd-a36fb145da55.json b/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-1b1/284ba4fb-cae4-46ac-a5dd-a36fb145da55.json deleted file mode 100644 index c109d586bff04818d1a4e4e805f2fd925e2bf18f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-1b1/284ba4fb-cae4-46ac-a5dd-a36fb145da55.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bigscience_bloom-1b1/1762652580.033589", - "retrieved_timestamp": "1762652580.033589", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bigscience/bloom-1b1", - "developer": "bigscience", - "inference_platform": "unknown", - "id": "bigscience/bloom-1b1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13733781920858879 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31072762377370394 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36999999999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1107878989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "BloomForCausalLM", - "params_billions": 1.065 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-1b7/8adb8bb9-d057-45df-827a-cd8f014b4ff6.json b/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-1b7/8adb8bb9-d057-45df-827a-cd8f014b4ff6.json deleted file mode 100644 index accb79738af8ec8a5423a5aa52ec843bd108a8ae..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-1b7/8adb8bb9-d057-45df-827a-cd8f014b4ff6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bigscience_bloom-1b7/1762652580.033839", - "retrieved_timestamp": "1762652580.033839", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bigscience/bloom-1b7", - "developer": "bigscience", - "inference_platform": "unknown", - "id": "bigscience/bloom-1b7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10438968603305895 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.314054919904072 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38857291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10862699468085106 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "BloomForCausalLM", - "params_billions": 1.722 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-3b/88f90805-7410-4ec1-ad19-8e8a146f1ba3.json b/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-3b/88f90805-7410-4ec1-ad19-8e8a146f1ba3.json deleted file mode 100644 index 818bbd49b9bd72c378dc892375a866de4609bc93..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-3b/88f90805-7410-4ec1-ad19-8e8a146f1ba3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bigscience_bloom-3b/1762652580.034177", - "retrieved_timestamp": "1762652580.034179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bigscience/bloom-3b", - "developer": "bigscience", - "inference_platform": "unknown", - "id": "bigscience/bloom-3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1270961050013963 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062918592346337 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23993288590604026 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3980625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11328125 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "BloomForCausalLM", - "params_billions": 3.003 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-560m/82454b92-cca1-4ac8-a620-e1a8487a5b8e.json b/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-560m/82454b92-cca1-4ac8-a620-e1a8487a5b8e.json deleted file mode 100644 index 85e4c8e5f5ef88b44239fb279f482eec8be2b5c3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-560m/82454b92-cca1-4ac8-a620-e1a8487a5b8e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bigscience_bloom-560m/1762652580.034546", - "retrieved_timestamp": "1762652580.034548", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bigscience/bloom-560m", - "developer": "bigscience", - "inference_platform": "unknown", - "id": "bigscience/bloom-560m" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06202431769926019 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3025950541549823 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4030833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11643949468085106 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "BloomForCausalLM", - "params_billions": 0.559 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-7b1/d5fe1452-b6ee-4f1d-9eca-713b49a6a941.json b/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-7b1/d5fe1452-b6ee-4f1d-9eca-713b49a6a941.json deleted file mode 100644 index b5965fbbe8126f51387a9c592a53137655e38b45..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-7b1/d5fe1452-b6ee-4f1d-9eca-713b49a6a941.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bigscience_bloom-7b1/1762652580.0348449", - "retrieved_timestamp": "1762652580.034846", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bigscience/bloom-7b1", - "developer": "bigscience", - "inference_platform": "unknown", - "id": "bigscience/bloom-7b1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13221696210499254 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3113718529627139 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34869791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11045545212765957 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "BloomForCausalLM", - "params_billions": 7.069 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bluuwhale/bluuwhale_L3-SthenoMaid-8B-V1/44dd13bc-56f0-4dd1-90d0-bb411239109a.json b/leaderboard_data/HFOpenLLMv2/bluuwhale/bluuwhale_L3-SthenoMaid-8B-V1/44dd13bc-56f0-4dd1-90d0-bb411239109a.json deleted file mode 100644 index fe03a39b7d97cee9cdf9088f25bbccdb76cb3f0a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bluuwhale/bluuwhale_L3-SthenoMaid-8B-V1/44dd13bc-56f0-4dd1-90d0-bb411239109a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bluuwhale_L3-SthenoMaid-8B-V1/1762652580.035146", - "retrieved_timestamp": "1762652580.035147", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bluuwhale/L3-SthenoMaid-8B-V1", - "developer": "bluuwhale", - "inference_platform": "unknown", - "id": "bluuwhale/L3-SthenoMaid-8B-V1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7344700949037443 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5218759253208048 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3656083776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bond005/bond005_meno-tiny-0.1/109acb38-3026-4573-b082-8277b9501f09.json b/leaderboard_data/HFOpenLLMv2/bond005/bond005_meno-tiny-0.1/109acb38-3026-4573-b082-8277b9501f09.json deleted file mode 100644 index 4969a9b605716060c126c1123dca01f50fcf17c8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bond005/bond005_meno-tiny-0.1/109acb38-3026-4573-b082-8277b9501f09.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bond005_meno-tiny-0.1/1762652580.035417", - "retrieved_timestamp": "1762652580.035417", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bond005/meno-tiny-0.1", - "developer": "bond005", - "inference_platform": "unknown", - "id": "bond005/meno-tiny-0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45497613000172876 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4262909130965971 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13897280966767372 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4184583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785904255319149 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/braindao/braindao_Qwen2.5-14B-Instruct/cb442f90-a0e1-4588-900c-548b994a764d.json b/leaderboard_data/HFOpenLLMv2/braindao/braindao_Qwen2.5-14B-Instruct/cb442f90-a0e1-4588-900c-548b994a764d.json deleted file mode 100644 index 83f5020377576b246cc980357df28b5c9a742153..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/braindao/braindao_Qwen2.5-14B-Instruct/cb442f90-a0e1-4588-900c-548b994a764d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/braindao_Qwen2.5-14B-Instruct/1762652580.040103", - "retrieved_timestamp": "1762652580.040104", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "braindao/Qwen2.5-14B-Instruct", - "developer": "braindao", - "inference_platform": "unknown", - "id": "braindao/Qwen2.5-14B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8142539572778007 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6403640774008682 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.414 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48894614361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/braindao/braindao_iq-code-evmind-0.5b/58f1b3d7-74a6-4ed0-b927-afaedfdda25f.json b/leaderboard_data/HFOpenLLMv2/braindao/braindao_iq-code-evmind-0.5b/58f1b3d7-74a6-4ed0-b927-afaedfdda25f.json deleted file mode 100644 index 9d8ebfdce44f3d43a65b459a5e2e003dfe188565..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/braindao/braindao_iq-code-evmind-0.5b/58f1b3d7-74a6-4ed0-b927-afaedfdda25f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/braindao_iq-code-evmind-0.5b/1762652580.0403671", - "retrieved_timestamp": "1762652580.040368", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "braindao/iq-code-evmind-0.5b", - "developer": "braindao", - "inference_platform": "unknown", - "id": "braindao/iq-code-evmind-0.5b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3215612353001148 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31637440507987097 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24161073825503357 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33037500000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11893284574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_3Bgeneral-ECE-PRYMMAL-Martial/06d2ac1d-d70c-4cda-997d-9d4d1ef50c5a.json b/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_3Bgeneral-ECE-PRYMMAL-Martial/06d2ac1d-d70c-4cda-997d-9d4d1ef50c5a.json deleted file mode 100644 index 3f85e2a869e1a5d42f075463520bf92c0751e520..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_3Bgeneral-ECE-PRYMMAL-Martial/06d2ac1d-d70c-4cda-997d-9d4d1ef50c5a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/brgx53_3Bgeneral-ECE-PRYMMAL-Martial/1762652580.040573", - "retrieved_timestamp": "1762652580.0405738", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "brgx53/3Bgeneral-ECE-PRYMMAL-Martial", - "developer": "brgx53", - "inference_platform": "unknown", - "id": "brgx53/3Bgeneral-ECE-PRYMMAL-Martial" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32893057088525113 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5458008312900208 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43728125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3933676861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_3Bgeneralv2-ECE-PRYMMAL-Martial/c7f6603c-dcca-49b9-94bd-0a1fbf707dd9.json b/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_3Bgeneralv2-ECE-PRYMMAL-Martial/c7f6603c-dcca-49b9-94bd-0a1fbf707dd9.json deleted file mode 100644 index 46cb1318a2db83054d129e2aa85b7450ba954dc0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_3Bgeneralv2-ECE-PRYMMAL-Martial/c7f6603c-dcca-49b9-94bd-0a1fbf707dd9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/brgx53_3Bgeneralv2-ECE-PRYMMAL-Martial/1762652580.040823", - "retrieved_timestamp": "1762652580.0408242", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "brgx53/3Bgeneralv2-ECE-PRYMMAL-Martial", - "developer": "brgx53", - "inference_platform": "unknown", - "id": "brgx53/3Bgeneralv2-ECE-PRYMMAL-Martial" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.567708125551315 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5607195549186694 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3496978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43563541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45054853723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_3Blareneg-ECE-PRYMMAL-Martial/6fea29aa-174f-4e3f-be91-c79842126c2c.json b/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_3Blareneg-ECE-PRYMMAL-Martial/6fea29aa-174f-4e3f-be91-c79842126c2c.json deleted file mode 100644 index 5b5056a7bf7d28be40f2357f44c0943e6cdd8ff7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_3Blareneg-ECE-PRYMMAL-Martial/6fea29aa-174f-4e3f-be91-c79842126c2c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/brgx53_3Blareneg-ECE-PRYMMAL-Martial/1762652580.041033", - "retrieved_timestamp": "1762652580.041034", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "brgx53/3Blareneg-ECE-PRYMMAL-Martial", - "developer": "brgx53", - "inference_platform": "unknown", - "id": "brgx53/3Blareneg-ECE-PRYMMAL-Martial" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28763902002242936 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.535846215598753 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4428958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4015957446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_3Blarenegv2-ECE-PRYMMAL-Martial/64e92286-72ea-4318-aaea-4e0be87a0067.json b/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_3Blarenegv2-ECE-PRYMMAL-Martial/64e92286-72ea-4318-aaea-4e0be87a0067.json deleted file mode 100644 index eb2ce0e5d10b9f60571ecb548aa19bcd5167ddfa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_3Blarenegv2-ECE-PRYMMAL-Martial/64e92286-72ea-4318-aaea-4e0be87a0067.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/brgx53_3Blarenegv2-ECE-PRYMMAL-Martial/1762652580.04124", - "retrieved_timestamp": "1762652580.04124", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "brgx53/3Blarenegv2-ECE-PRYMMAL-Martial", - "developer": "brgx53", - "inference_platform": "unknown", - "id": "brgx53/3Blarenegv2-ECE-PRYMMAL-Martial" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5661843907498769 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5607195549186694 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3496978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43563541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45054853723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_Barracuda-PRYMMAL-ECE-TW3/70a11b76-f8e4-4cfb-8ab6-791c7e9ba113.json b/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_Barracuda-PRYMMAL-ECE-TW3/70a11b76-f8e4-4cfb-8ab6-791c7e9ba113.json deleted file mode 100644 index 38954e57766daae333d7a9e78656375f5746356a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_Barracuda-PRYMMAL-ECE-TW3/70a11b76-f8e4-4cfb-8ab6-791c7e9ba113.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/brgx53_Barracuda-PRYMMAL-ECE-TW3/1762652580.041505", - "retrieved_timestamp": "1762652580.041506", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "brgx53/Barracuda-PRYMMAL-ECE-TW3", - "developer": "brgx53", - "inference_platform": "unknown", - "id": "brgx53/Barracuda-PRYMMAL-ECE-TW3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16401592219754696 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30024599561514337 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36085416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10929188829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_LaConfiance-PRYMMAL-ECE-TW3/f4766bd8-0130-4ed1-ae1c-8177a65d94a9.json b/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_LaConfiance-PRYMMAL-ECE-TW3/f4766bd8-0130-4ed1-ae1c-8177a65d94a9.json deleted file mode 100644 index 912079853b0d80183b6e2c51e71a44588540524f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_LaConfiance-PRYMMAL-ECE-TW3/f4766bd8-0130-4ed1-ae1c-8177a65d94a9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/brgx53_LaConfiance-PRYMMAL-ECE-TW3/1762652580.041717", - "retrieved_timestamp": "1762652580.041717", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "brgx53/LaConfiance-PRYMMAL-ECE-TW3", - "developer": "brgx53", - "inference_platform": "unknown", - "id": "brgx53/LaConfiance-PRYMMAL-ECE-TW3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1579209829917951 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29624186550380993 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38457291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11461103723404255 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Blabbertron-1.0/195957fa-9d4e-49ec-afd9-17125ebcf62d.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Blabbertron-1.0/195957fa-9d4e-49ec-afd9-17125ebcf62d.json deleted file mode 100644 index a84739d6a2cc34a6ff70e4a2ca85880700282dd6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Blabbertron-1.0/195957fa-9d4e-49ec-afd9-17125ebcf62d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Blabbertron-1.0/1762652580.0421708", - "retrieved_timestamp": "1762652580.042172", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Blabbertron-1.0", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/Blabbertron-1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7433376773627309 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5496552006589083 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49244712990936557 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4336875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4354222074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Blabbertron-1.1/9fbe416c-de18-4f83-812c-f48071a49917.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Blabbertron-1.1/9fbe416c-de18-4f83-812c-f48071a49917.json deleted file mode 100644 index b1a0cdba925bc4798ab1e4a4bc727d1096998222..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Blabbertron-1.1/9fbe416c-de18-4f83-812c-f48071a49917.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Blabbertron-1.1/1762652580.0424142", - "retrieved_timestamp": "1762652580.0424151", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Blabbertron-1.1", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/Blabbertron-1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7265267268625026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5534000697428705 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48036253776435045 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4415625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44306848404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_DeepThinker-7B-Sce-v1/814129ce-9101-4d9b-9e53-9161a010743f.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_DeepThinker-7B-Sce-v1/814129ce-9101-4d9b-9e53-9161a010743f.json deleted file mode 100644 index ff9f210af59b7545820580355a336f680b5f78ae..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_DeepThinker-7B-Sce-v1/814129ce-9101-4d9b-9e53-9161a010743f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_DeepThinker-7B-Sce-v1/1762652580.043317", - "retrieved_timestamp": "1762652580.043317", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/DeepThinker-7B-Sce-v1", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/DeepThinker-7B-Sce-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12180015691698028 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30182806791122846 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41942708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228390957446809 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_DeepThinker-7B-Sce-v2/82cc30d2-9bb6-499f-b522-c66688e07c00.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_DeepThinker-7B-Sce-v2/82cc30d2-9bb6-499f-b522-c66688e07c00.json deleted file mode 100644 index 2b800219f67ccc821b6b7c7b87d9b838d5a02c97..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_DeepThinker-7B-Sce-v2/82cc30d2-9bb6-499f-b522-c66688e07c00.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_DeepThinker-7B-Sce-v2/1762652580.0435221", - "retrieved_timestamp": "1762652580.043523", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/DeepThinker-7B-Sce-v2", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/DeepThinker-7B-Sce-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16306621985221434 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3056842322947901 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4100625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11461103723404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_FuseCyberMix-Qwen-2.5-7B-Instruct/d851bc0d-5f11-40f6-982c-39809dffe946.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_FuseCyberMix-Qwen-2.5-7B-Instruct/d851bc0d-5f11-40f6-982c-39809dffe946.json deleted file mode 100644 index a4a6c1f0e5f80517e66a477a7b2c817df8ea0146..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_FuseCyberMix-Qwen-2.5-7B-Instruct/d851bc0d-5f11-40f6-982c-39809dffe946.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_FuseCyberMix-Qwen-2.5-7B-Instruct/1762652580.043724", - "retrieved_timestamp": "1762652580.043725", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/FuseCyberMix-Qwen-2.5-7B-Instruct", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/FuseCyberMix-Qwen-2.5-7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7019220113742648 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5517973725429837 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40203125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43367686170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_FuseQwQen-7B/06b6f8e3-f3c7-43a6-bb69-e1eb3bd10b7a.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_FuseQwQen-7B/06b6f8e3-f3c7-43a6-bb69-e1eb3bd10b7a.json deleted file mode 100644 index 703ab06846db9b1923292e14d267e898efcfad84..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_FuseQwQen-7B/06b6f8e3-f3c7-43a6-bb69-e1eb3bd10b7a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_FuseQwQen-7B/1762652580.0439281", - "retrieved_timestamp": "1762652580.043929", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/FuseQwQen-7B", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/FuseQwQen-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7274509412802475 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5504256932515404 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43655589123867067 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4216875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4406582446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Maestro-S1k-7B-Sce/cc0c2de6-5a8d-4229-bd92-a1ad0b95a6b0.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Maestro-S1k-7B-Sce/cc0c2de6-5a8d-4229-bd92-a1ad0b95a6b0.json deleted file mode 100644 index ad156157d8711d24f70462075d8a4e93cc13c71a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Maestro-S1k-7B-Sce/cc0c2de6-5a8d-4229-bd92-a1ad0b95a6b0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Maestro-S1k-7B-Sce/1762652580.048955", - "retrieved_timestamp": "1762652580.048955", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Maestro-S1k-7B-Sce", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/Maestro-S1k-7B-Sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2522684255553044 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104380842714463 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3768229166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11702127659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Qandora-2.5-7B-Creative/acd82774-f29a-4b19-b08c-693706bb4603.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Qandora-2.5-7B-Creative/acd82774-f29a-4b19-b08c-693706bb4603.json deleted file mode 100644 index 23b76e19d2f85cee961b8e3164d8e452c60a6e2b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Qandora-2.5-7B-Creative/acd82774-f29a-4b19-b08c-693706bb4603.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qandora-2.5-7B-Creative/1762652580.0529459", - "retrieved_timestamp": "1762652580.052947", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qandora-2.5-7B-Creative", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/Qandora-2.5-7B-Creative" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6803148978044922 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5541763892398439 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30589123867069484 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4211875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4479720744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QandoraExp-7B-Persona/4e9dc7ca-f4f2-4c1f-b532-628a8d9d515b.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QandoraExp-7B-Persona/4e9dc7ca-f4f2-4c1f-b532-628a8d9d515b.json deleted file mode 100644 index 077d5930abba8c6cbb955a64787e7f48037c2731..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QandoraExp-7B-Persona/4e9dc7ca-f4f2-4c1f-b532-628a8d9d515b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_QandoraExp-7B-Persona/1762652580.0533981", - "retrieved_timestamp": "1762652580.053399", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/QandoraExp-7B-Persona", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/QandoraExp-7B-Persona" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6246858335882126 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5558337526959515 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104229607250755 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43715624999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44074135638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QandoraExp-7B-v2/85bc0517-382e-4a4c-ac31-ee6de74d2c8f.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QandoraExp-7B-v2/85bc0517-382e-4a4c-ac31-ee6de74d2c8f.json deleted file mode 100644 index 1327641090433e2cd3a5cedfd0532a81876caa40..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QandoraExp-7B-v2/85bc0517-382e-4a4c-ac31-ee6de74d2c8f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_QandoraExp-7B-v2/1762652580.053621", - "retrieved_timestamp": "1762652580.053621", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/QandoraExp-7B-v2", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/QandoraExp-7B-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5606889719278182 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5444864824489132 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47129909365558914 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40454166666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.390874335106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QandoraExp-7B/744f9f56-fbb4-450f-9427-35e6e49ca014.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QandoraExp-7B/744f9f56-fbb4-450f-9427-35e6e49ca014.json deleted file mode 100644 index 92af2878eb87b8036419088363cd144270e19b86..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QandoraExp-7B/744f9f56-fbb4-450f-9427-35e6e49ca014.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_QandoraExp-7B/1762652580.0531762", - "retrieved_timestamp": "1762652580.0531762", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/QandoraExp-7B", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/QandoraExp-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7509064836855099 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5477959748047708 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4743202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43120833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4409906914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QwQen-3B-LCoT-R1/636c4294-b3d0-42fc-b437-e4a80f70b4d9.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QwQen-3B-LCoT-R1/636c4294-b3d0-42fc-b437-e4a80f70b4d9.json deleted file mode 100644 index 63fd5cf9fda5179dd212007aeee3b1e0624a68e7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QwQen-3B-LCoT-R1/636c4294-b3d0-42fc-b437-e4a80f70b4d9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_QwQen-3B-LCoT-R1/1762652580.05408", - "retrieved_timestamp": "1762652580.054081", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/QwQen-3B-LCoT-R1", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/QwQen-3B-LCoT-R1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.534160471992092 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4798600168403517 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33534743202416917 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41384375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3723404255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.085 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QwQen-3B-LCoT/bff23021-087b-4118-ba4d-219a97a1dedc.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QwQen-3B-LCoT/bff23021-087b-4118-ba4d-219a97a1dedc.json deleted file mode 100644 index 3652835b507237f52c46c6f4a07410a41cc50537..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QwQen-3B-LCoT/bff23021-087b-4118-ba4d-219a97a1dedc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_QwQen-3B-LCoT/1762652580.05384", - "retrieved_timestamp": "1762652580.0538409", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/QwQen-3B-LCoT", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/QwQen-3B-LCoT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6025290673191577 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4899306773152123 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36178247734138974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41778125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699301861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Qwen2.5-7B-Instruct-Fusion/6d88de9c-062d-4858-95ef-a05f6a29b6c3.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Qwen2.5-7B-Instruct-Fusion/6d88de9c-062d-4858-95ef-a05f6a29b6c3.json deleted file mode 100644 index fb0f96fbd83ba068d2dbc8041dea3f30afed443c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Qwen2.5-7B-Instruct-Fusion/6d88de9c-062d-4858-95ef-a05f6a29b6c3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-Instruct-Fusion/1762652580.0585442", - "retrieved_timestamp": "1762652580.0585449", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-7B-Instruct-Fusion", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-7B-Instruct-Fusion" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6962016338869754 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5491903018724945 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3406344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42971875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4467253989361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Qwen2.5-7B-Instruct-Merge-Stock-v0.1/fe31c10e-8231-49f4-afb3-e2588396c032.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Qwen2.5-7B-Instruct-Merge-Stock-v0.1/fe31c10e-8231-49f4-afb3-e2588396c032.json deleted file mode 100644 index 04533db90ed6b2d6c6257b80a6d9594af2dc414b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Qwen2.5-7B-Instruct-Merge-Stock-v0.1/fe31c10e-8231-49f4-afb3-e2588396c032.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-Instruct-Merge-Stock-v0.1/1762652580.0587678", - "retrieved_timestamp": "1762652580.058769", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Qwen2.5-7B-Instruct-Merge-Stock-v0.1", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/Qwen2.5-7B-Instruct-Merge-Stock-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7509064836855099 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5529431709465797 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48942598187311176 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42311458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4383311170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_SmolLM2-1.7-Persona/5249691a-3672-4ccd-98dd-d9b937bca750.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_SmolLM2-1.7-Persona/5249691a-3672-4ccd-98dd-d9b937bca750.json deleted file mode 100644 index f8892d26832ca51eebbbe1f26aa39c454c83f690..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_SmolLM2-1.7-Persona/5249691a-3672-4ccd-98dd-d9b937bca750.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_SmolLM2-1.7-Persona/1762652580.062155", - "retrieved_timestamp": "1762652580.062156", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/SmolLM2-1.7-Persona", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/SmolLM2-1.7-Persona" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5465254413844156 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3623213930905173 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.334125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1973902925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.711 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_SmolLM2-1.7B-roleplay-lora/ae109e51-8631-4e09-8839-8e9ed74da4c7.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_SmolLM2-1.7B-roleplay-lora/ae109e51-8631-4e09-8839-8e9ed74da4c7.json deleted file mode 100644 index 8c4d7eec661f8a7cf3afe486ae1ed964f9189d48..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_SmolLM2-1.7B-roleplay-lora/ae109e51-8631-4e09-8839-8e9ed74da4c7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_SmolLM2-1.7B-roleplay-lora/1762652580.062429", - "retrieved_timestamp": "1762652580.06243", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/SmolLM2-1.7B-roleplay-lora", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/SmolLM2-1.7B-roleplay-lora" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5382075116247114 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3610343412303005 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33945833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19664228723404256 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 3.423 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Tulu-3.1-8B-SuperNova/cd979586-e334-4964-b06c-f33c66f09c0e.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Tulu-3.1-8B-SuperNova/cd979586-e334-4964-b06c-f33c66f09c0e.json deleted file mode 100644 index 0de3813bcaf001e1a465ee5b7a5ac775f819996a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Tulu-3.1-8B-SuperNova/cd979586-e334-4964-b06c-f33c66f09c0e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Tulu-3.1-8B-SuperNova/1762652580.062763", - "retrieved_timestamp": "1762652580.0627651", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Tulu-3.1-8B-SuperNova", - "developer": "bunnycore", - "inference_platform": "unknown", - "id": "bunnycore/Tulu-3.1-8B-SuperNova" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8193748143813969 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5254122754311122 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24622356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3935 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3813996010638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/byroneverson/byroneverson_Mistral-Small-Instruct-2409-abliterated/ff0c627b-72b9-45d4-a385-49c8b0ae6b6e.json b/leaderboard_data/HFOpenLLMv2/byroneverson/byroneverson_Mistral-Small-Instruct-2409-abliterated/ff0c627b-72b9-45d4-a385-49c8b0ae6b6e.json deleted file mode 100644 index 0b0eb1a5aec8f57cd12de75b34a91384632effdf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/byroneverson/byroneverson_Mistral-Small-Instruct-2409-abliterated/ff0c627b-72b9-45d4-a385-49c8b0ae6b6e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/byroneverson_Mistral-Small-Instruct-2409-abliterated/1762652580.063036", - "retrieved_timestamp": "1762652580.063037", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "byroneverson/Mistral-Small-Instruct-2409-abliterated", - "developer": "byroneverson", - "inference_platform": "unknown", - "id": "byroneverson/Mistral-Small-Instruct-2409-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6970759806203096 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5237864400325174 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24773413897280966 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36971875000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39228723404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/byroneverson/byroneverson_Yi-1.5-9B-Chat-16K-abliterated/dc783bb0-c784-4cf4-888b-36a3bfa37a84.json b/leaderboard_data/HFOpenLLMv2/byroneverson/byroneverson_Yi-1.5-9B-Chat-16K-abliterated/dc783bb0-c784-4cf4-888b-36a3bfa37a84.json deleted file mode 100644 index 1e0526c1d43ce59e4fc33663093ca0deb8f84f38..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/byroneverson/byroneverson_Yi-1.5-9B-Chat-16K-abliterated/dc783bb0-c784-4cf4-888b-36a3bfa37a84.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/byroneverson_Yi-1.5-9B-Chat-16K-abliterated/1762652580.068388", - "retrieved_timestamp": "1762652580.068392", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "byroneverson/Yi-1.5-9B-Chat-16K-abliterated", - "developer": "byroneverson", - "inference_platform": "unknown", - "id": "byroneverson/Yi-1.5-9B-Chat-16K-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5528453392553979 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5282050829986801 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14123867069486404 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4734375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38231382978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/byroneverson/byroneverson_Yi-1.5-9B-Chat-abliterated/345560e2-c981-4aca-9388-4f3a5e95ace8.json b/leaderboard_data/HFOpenLLMv2/byroneverson/byroneverson_Yi-1.5-9B-Chat-abliterated/345560e2-c981-4aca-9388-4f3a5e95ace8.json deleted file mode 100644 index 985fb6bf85fb7714163adb35915336090add81bb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/byroneverson/byroneverson_Yi-1.5-9B-Chat-abliterated/345560e2-c981-4aca-9388-4f3a5e95ace8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/byroneverson_Yi-1.5-9B-Chat-abliterated/1762652580.070213", - "retrieved_timestamp": "1762652580.070215", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "byroneverson/Yi-1.5-9B-Chat-abliterated", - "developer": "byroneverson", - "inference_platform": "unknown", - "id": "byroneverson/Yi-1.5-9B-Chat-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5723291976400395 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5401219363002313 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43886458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3715093085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/c10x/c10x_Q-Pluse/2093ba5f-d2f8-45d2-bcf7-ff48810c47af.json b/leaderboard_data/HFOpenLLMv2/c10x/c10x_Q-Pluse/2093ba5f-d2f8-45d2-bcf7-ff48810c47af.json deleted file mode 100644 index 5680463d07d45110676ddb5c93086563359625f5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/c10x/c10x_Q-Pluse/2093ba5f-d2f8-45d2-bcf7-ff48810c47af.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/c10x_Q-Pluse/1762652580.070795", - "retrieved_timestamp": "1762652580.070796", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "c10x/Q-Pluse", - "developer": "c10x", - "inference_platform": "unknown", - "id": "c10x/Q-Pluse" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228318638988993 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2875111436321769 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24664429530201343 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39381249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11353058510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/c10x/c10x_longthinker/fe7bd3bb-71a4-46dd-a86d-b5a24b685fa5.json b/leaderboard_data/HFOpenLLMv2/c10x/c10x_longthinker/fe7bd3bb-71a4-46dd-a86d-b5a24b685fa5.json deleted file mode 100644 index 325dee7447e8815fe1fb725e9b5ef3de1cc8c2c4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/c10x/c10x_longthinker/fe7bd3bb-71a4-46dd-a86d-b5a24b685fa5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/c10x_longthinker/1762652580.078971", - "retrieved_timestamp": "1762652580.078974", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "c10x/longthinker", - "developer": "c10x", - "inference_platform": "unknown", - "id": "c10x/longthinker" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36087913403103766 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49274888053364546 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23187311178247735 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3909583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3527260638297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/carsenk/carsenk_flippa-v6/a4bcc6f3-b745-48f7-a394-90cd42363aae.json b/leaderboard_data/HFOpenLLMv2/carsenk/carsenk_flippa-v6/a4bcc6f3-b745-48f7-a394-90cd42363aae.json deleted file mode 100644 index 2843e36d2b4f05061b352479dfe17e4dc640ef8c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/carsenk/carsenk_flippa-v6/a4bcc6f3-b745-48f7-a394-90cd42363aae.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/carsenk_flippa-v6/1762652580.079394", - "retrieved_timestamp": "1762652580.079395", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "carsenk/flippa-v6", - "developer": "carsenk", - "inference_platform": "unknown", - "id": "carsenk/flippa-v6" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3439429602344003 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5046972457053399 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1404833836858006 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40887500000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3667719414893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 16.061 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/cgato/cgato_TheSalt-L3-8b-v0.3.2/aa805bcc-3847-40b5-86eb-397982106d18.json b/leaderboard_data/HFOpenLLMv2/cgato/cgato_TheSalt-L3-8b-v0.3.2/aa805bcc-3847-40b5-86eb-397982106d18.json deleted file mode 100644 index 4b82dee13283ae206ac1e53c3fb6f7298f8c8128..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/cgato/cgato_TheSalt-L3-8b-v0.3.2/aa805bcc-3847-40b5-86eb-397982106d18.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cgato_TheSalt-L3-8b-v0.3.2/1762652580.100134", - "retrieved_timestamp": "1762652580.100136", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cgato/TheSalt-L3-8b-v0.3.2", - "developer": "cgato", - "inference_platform": "unknown", - "id": "cgato/TheSalt-L3-8b-v0.3.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27050337548814923 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29679653176003074 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38962499999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11394614361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/chujiezheng/chujiezheng_Llama-3-Instruct-8B-SimPO-ExPO/bdf85c5c-6eaa-4df6-a393-66b71aa28952.json b/leaderboard_data/HFOpenLLMv2/chujiezheng/chujiezheng_Llama-3-Instruct-8B-SimPO-ExPO/bdf85c5c-6eaa-4df6-a393-66b71aa28952.json deleted file mode 100644 index ec1f92016b66db0c6dc4994cb119ee67bd15e83c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/chujiezheng/chujiezheng_Llama-3-Instruct-8B-SimPO-ExPO/bdf85c5c-6eaa-4df6-a393-66b71aa28952.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/chujiezheng_Llama-3-Instruct-8B-SimPO-ExPO/1762652580.1008909", - "retrieved_timestamp": "1762652580.100893", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO", - "developer": "chujiezheng", - "inference_platform": "unknown", - "id": "chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6433707008515184 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4764515968840137 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3920104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.340093085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/cjvt/cjvt_GaMS-1B/e9acbb25-2b96-4a2a-92ff-d2b68c0e49f8.json b/leaderboard_data/HFOpenLLMv2/cjvt/cjvt_GaMS-1B/e9acbb25-2b96-4a2a-92ff-d2b68c0e49f8.json deleted file mode 100644 index 507e6b26125eda2cd40974598b9d8a2f4fd72931..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/cjvt/cjvt_GaMS-1B/e9acbb25-2b96-4a2a-92ff-d2b68c0e49f8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cjvt_GaMS-1B/1762652580.101496", - "retrieved_timestamp": "1762652580.1014972", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cjvt/GaMS-1B", - "developer": "cjvt", - "inference_platform": "unknown", - "id": "cjvt/GaMS-1B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.163541625110263 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3074752552734472 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36841666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11486037234042554 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "OPTForCausalLM", - "params_billions": 1.54 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/cloudyu/cloudyu_Mixtral_11Bx2_MoE_19B/9be76c82-0f70-4b76-8476-7707d4da85bb.json b/leaderboard_data/HFOpenLLMv2/cloudyu/cloudyu_Mixtral_11Bx2_MoE_19B/9be76c82-0f70-4b76-8476-7707d4da85bb.json deleted file mode 100644 index d617801d92c79b8040c10a694e07653608bada50..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/cloudyu/cloudyu_Mixtral_11Bx2_MoE_19B/9be76c82-0f70-4b76-8476-7707d4da85bb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cloudyu_Mixtral_11Bx2_MoE_19B/1762652580.102268", - "retrieved_timestamp": "1762652580.102269", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cloudyu/Mixtral_11Bx2_MoE_19B", - "developer": "cloudyu", - "inference_platform": "unknown", - "id": "cloudyu/Mixtral_11Bx2_MoE_19B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3850837998732253 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5208516020145867 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4296875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33111702127659576 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 19.188 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/cloudyu/cloudyu_Mixtral_34Bx2_MoE_60B/fdbef33b-dffb-4146-bc83-f8b03c842b2e.json b/leaderboard_data/HFOpenLLMv2/cloudyu/cloudyu_Mixtral_34Bx2_MoE_60B/fdbef33b-dffb-4146-bc83-f8b03c842b2e.json deleted file mode 100644 index 8e537411a1909632e9c3bfcb55feb8f143241c50..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/cloudyu/cloudyu_Mixtral_34Bx2_MoE_60B/fdbef33b-dffb-4146-bc83-f8b03c842b2e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cloudyu_Mixtral_34Bx2_MoE_60B/1762652580.102543", - "retrieved_timestamp": "1762652580.1025438", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cloudyu/Mixtral_34Bx2_MoE_60B", - "developer": "cloudyu", - "inference_platform": "unknown", - "id": "cloudyu/Mixtral_34Bx2_MoE_60B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4537770892343427 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5869701263465353 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0770392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4625208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47664561170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 60.814 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/cloudyu/cloudyu_Mixtral_7Bx2_MoE/b6c048f5-b01e-4e51-8a6c-c068dfd199ef.json b/leaderboard_data/HFOpenLLMv2/cloudyu/cloudyu_Mixtral_7Bx2_MoE/b6c048f5-b01e-4e51-8a6c-c068dfd199ef.json deleted file mode 100644 index 584a9d460146bfb557e1fbf875b1199efd1895ab..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/cloudyu/cloudyu_Mixtral_7Bx2_MoE/b6c048f5-b01e-4e51-8a6c-c068dfd199ef.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cloudyu_Mixtral_7Bx2_MoE/1762652580.102766", - "retrieved_timestamp": "1762652580.102767", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cloudyu/Mixtral_7Bx2_MoE", - "developer": "cloudyu", - "inference_platform": "unknown", - "id": "cloudyu/Mixtral_7Bx2_MoE" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4480068440626427 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5159732691655027 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44729166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30435505319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/cloudyu/cloudyu_Yi-34Bx2-MoE-60B-DPO/542d450b-8108-4abe-a2ae-5b9a577558d6.json b/leaderboard_data/HFOpenLLMv2/cloudyu/cloudyu_Yi-34Bx2-MoE-60B-DPO/542d450b-8108-4abe-a2ae-5b9a577558d6.json deleted file mode 100644 index 4c50c883b0be9f993bb07c5384aa76117040b245..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/cloudyu/cloudyu_Yi-34Bx2-MoE-60B-DPO/542d450b-8108-4abe-a2ae-5b9a577558d6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cloudyu_Yi-34Bx2-MoE-60B-DPO/1762652580.108832", - "retrieved_timestamp": "1762652580.1088362", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cloudyu/Yi-34Bx2-MoE-60B-DPO", - "developer": "cloudyu", - "inference_platform": "unknown", - "id": "cloudyu/Yi-34Bx2-MoE-60B-DPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.531887613753729 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.516831447641953 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43746875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46766954787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 60.814 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/cpayne1303/cpayne1303_cp2024-instruct/247e1c1e-ce27-4645-a2ae-4177f08ea4a5.json b/leaderboard_data/HFOpenLLMv2/cpayne1303/cpayne1303_cp2024-instruct/247e1c1e-ce27-4645-a2ae-4177f08ea4a5.json deleted file mode 100644 index 0d50fb2bcf0251bf2190686337c2cb983c5f25ef..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/cpayne1303/cpayne1303_cp2024-instruct/247e1c1e-ce27-4645-a2ae-4177f08ea4a5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cpayne1303_cp2024-instruct/1762652580.116854", - "retrieved_timestamp": "1762652580.116854", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cpayne1303/cp2024-instruct", - "developer": "cpayne1303", - "inference_platform": "unknown", - "id": "cpayne1303/cp2024-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17061064641817045 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2946778102988436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686354166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11668882978723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.031 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/cpayne1303/cpayne1303_cp2024/2bfb7bea-a344-4249-8bdc-e6c483518df5.json b/leaderboard_data/HFOpenLLMv2/cpayne1303/cpayne1303_cp2024/2bfb7bea-a344-4249-8bdc-e6c483518df5.json deleted file mode 100644 index 5e4f63731aaa22e6978744758314c68cf8e29a1a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/cpayne1303/cpayne1303_cp2024/2bfb7bea-a344-4249-8bdc-e6c483518df5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cpayne1303_cp2024/1762652580.116582", - "retrieved_timestamp": "1762652580.1165829", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cpayne1303/cp2024", - "developer": "cpayne1303", - "inference_platform": "unknown", - "id": "cpayne1303/cp2024" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16581448334862608 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29853854089245085 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3383125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11012300531914894 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.031 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/cpayne1303/cpayne1303_smallcp2024/fcbede38-3a5b-4cd7-b144-cbf26cc05df9.json b/leaderboard_data/HFOpenLLMv2/cpayne1303/cpayne1303_smallcp2024/fcbede38-3a5b-4cd7-b144-cbf26cc05df9.json deleted file mode 100644 index 295373ed1dcac98484a94e457a3708494aaa27f1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/cpayne1303/cpayne1303_smallcp2024/fcbede38-3a5b-4cd7-b144-cbf26cc05df9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cpayne1303_smallcp2024/1762652580.117528", - "retrieved_timestamp": "1762652580.117528", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cpayne1303/smallcp2024", - "developer": "cpayne1303", - "inference_platform": "unknown", - "id": "cpayne1303/smallcp2024" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1581958093414363 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3027047714604053 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23070469798657717 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34246874999999993 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11136968085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.002 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/crestf411/crestf411_MN-Slush/b32a7808-7a64-41a8-aad4-030efc512906.json b/leaderboard_data/HFOpenLLMv2/crestf411/crestf411_MN-Slush/b32a7808-7a64-41a8-aad4-030efc512906.json deleted file mode 100644 index 014effd43be8f804c95e7f1d032548f716682df0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/crestf411/crestf411_MN-Slush/b32a7808-7a64-41a8-aad4-030efc512906.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/crestf411_MN-Slush/1762652580.117737", - "retrieved_timestamp": "1762652580.117738", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "crestf411/MN-Slush", - "developer": "crestf411", - "inference_platform": "unknown", - "id": "crestf411/MN-Slush" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4077148632295642 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5340014235282594 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39328125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3508144946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/cyberagent/cyberagent_calm3-22b-chat/b7ce290d-d082-4586-ac4b-516e8130ddc2.json b/leaderboard_data/HFOpenLLMv2/cyberagent/cyberagent_calm3-22b-chat/b7ce290d-d082-4586-ac4b-516e8130ddc2.json deleted file mode 100644 index ed290f1f1763b0416a5e215b1f9d730b944fdd0c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/cyberagent/cyberagent_calm3-22b-chat/b7ce290d-d082-4586-ac4b-516e8130ddc2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cyberagent_calm3-22b-chat/1762652580.118237", - "retrieved_timestamp": "1762652580.118238", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cyberagent/calm3-22b-chat", - "developer": "cyberagent", - "inference_platform": "unknown", - "id": "cyberagent/calm3-22b-chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.509131327100981 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4991683247746046 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06948640483383686 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45532291666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29496343085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 22.543 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/darkc0de/darkc0de_BuddyGlassNeverSleeps/675f6dfe-c623-4694-94cb-8705aab5521f.json b/leaderboard_data/HFOpenLLMv2/darkc0de/darkc0de_BuddyGlassNeverSleeps/675f6dfe-c623-4694-94cb-8705aab5521f.json deleted file mode 100644 index 66e7450af41194e8b3d0fbb45e501edc0da300db..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/darkc0de/darkc0de_BuddyGlassNeverSleeps/675f6dfe-c623-4694-94cb-8705aab5521f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/darkc0de_BuddyGlassNeverSleeps/1762652580.1184928", - "retrieved_timestamp": "1762652580.118494", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "darkc0de/BuddyGlassNeverSleeps", - "developer": "darkc0de", - "inference_platform": "unknown", - "id": "darkc0de/BuddyGlassNeverSleeps" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4239019135892764 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49772281653646816 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3992708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34524601063829785 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/darkc0de/darkc0de_BuddyGlassUncensored2025.2/ea8dfb5f-750d-4573-a2bb-dadafc3a73b7.json b/leaderboard_data/HFOpenLLMv2/darkc0de/darkc0de_BuddyGlassUncensored2025.2/ea8dfb5f-750d-4573-a2bb-dadafc3a73b7.json deleted file mode 100644 index f2d90db12d565da7187b8c11b95ab773c72a90f3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/darkc0de/darkc0de_BuddyGlassUncensored2025.2/ea8dfb5f-750d-4573-a2bb-dadafc3a73b7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/darkc0de_BuddyGlassUncensored2025.2/1762652580.118735", - "retrieved_timestamp": "1762652580.1187358", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "darkc0de/BuddyGlassUncensored2025.2", - "developer": "darkc0de", - "inference_platform": "unknown", - "id": "darkc0de/BuddyGlassUncensored2025.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7731131176389756 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6095411371819216 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24018126888217523 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43359375 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/darkc0de/darkc0de_BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp/adf85459-eba0-48a8-ad54-1e17d1ea5b31.json b/leaderboard_data/HFOpenLLMv2/darkc0de/darkc0de_BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp/adf85459-eba0-48a8-ad54-1e17d1ea5b31.json deleted file mode 100644 index 5149d1b9e15688ffca5b8814887afe3a3e1a0601..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/darkc0de/darkc0de_BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp/adf85459-eba0-48a8-ad54-1e17d1ea5b31.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/darkc0de_BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp/1762652580.1189609", - "retrieved_timestamp": "1762652580.1189609", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "darkc0de/BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp", - "developer": "darkc0de", - "inference_platform": "unknown", - "id": "darkc0de/BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43584245357872664 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5243087998656722 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4143333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36727061170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.007 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/databricks/databricks_dbrx-base/17febb53-0735-4983-8049-85319818ab84.json b/leaderboard_data/HFOpenLLMv2/databricks/databricks_dbrx-base/17febb53-0735-4983-8049-85319818ab84.json deleted file mode 100644 index e09c2c814fe12578c1ff937ff44858b218307db0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/databricks/databricks_dbrx-base/17febb53-0735-4983-8049-85319818ab84.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/databricks_dbrx-base/1762652580.1191711", - "retrieved_timestamp": "1762652580.1191711", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "databricks/dbrx-base", - "developer": "databricks", - "inference_platform": "unknown", - "id": "databricks/dbrx-base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08214723926380368 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5195833333333334 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32666666666666666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4066666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Unknown", - "params_billions": 0.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/databricks/databricks_dbrx-instruct/639e4921-9fa8-446d-b539-f03a7589b142.json b/leaderboard_data/HFOpenLLMv2/databricks/databricks_dbrx-instruct/639e4921-9fa8-446d-b539-f03a7589b142.json deleted file mode 100644 index 73288ef798a6d4ba2caf8278dc3f9053130cd52f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/databricks/databricks_dbrx-instruct/639e4921-9fa8-446d-b539-f03a7589b142.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/databricks_dbrx-instruct/1762652580.119466", - "retrieved_timestamp": "1762652580.119467", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "databricks/dbrx-instruct", - "developer": "databricks", - "inference_platform": "unknown", - "id": "databricks/dbrx-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5415796752616391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5428960796934387 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42692708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36826795212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "DbrxForCausalLM", - "params_billions": 131.597 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/databricks/databricks_dolly-v1-6b/62299ec1-dd42-4751-a224-3bdda71d3cdf.json b/leaderboard_data/HFOpenLLMv2/databricks/databricks_dolly-v1-6b/62299ec1-dd42-4751-a224-3bdda71d3cdf.json deleted file mode 100644 index 2d277c2d89a39fe15e77251089ed40c168fb8464..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/databricks/databricks_dolly-v1-6b/62299ec1-dd42-4751-a224-3bdda71d3cdf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/databricks_dolly-v1-6b/1762652580.1196742", - "retrieved_timestamp": "1762652580.119675", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "databricks/dolly-v1-6b", - "developer": "databricks", - "inference_platform": "unknown", - "id": "databricks/dolly-v1-6b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22244311759464885 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3172089528774696 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40041666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12657912234042554 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GPTJForCausalLM", - "params_billions": 6.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/databricks/databricks_dolly-v2-12b/c83e2bf0-5d4e-45c4-aff2-27aea2bc0fb6.json b/leaderboard_data/HFOpenLLMv2/databricks/databricks_dolly-v2-12b/c83e2bf0-5d4e-45c4-aff2-27aea2bc0fb6.json deleted file mode 100644 index 6e9ddbcf465b8cbc27ce324123069aa3eeab0e43..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/databricks/databricks_dolly-v2-12b/c83e2bf0-5d4e-45c4-aff2-27aea2bc0fb6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/databricks_dolly-v2-12b/1762652580.1198819", - "retrieved_timestamp": "1762652580.119883", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "databricks/dolly-v2-12b", - "developer": "databricks", - "inference_platform": "unknown", - "id": "databricks/dolly-v2-12b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23550734273948679 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33199731673771277 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2407718120805369 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37390625000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11286569148936171 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 12.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/databricks/databricks_dolly-v2-3b/a8838707-f188-440e-801f-e780e0dd362a.json b/leaderboard_data/HFOpenLLMv2/databricks/databricks_dolly-v2-3b/a8838707-f188-440e-801f-e780e0dd362a.json deleted file mode 100644 index 389c515efd0b149289d0424bdcf9df4e347ba3a4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/databricks/databricks_dolly-v2-3b/a8838707-f188-440e-801f-e780e0dd362a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/databricks_dolly-v2-3b/1762652580.1200871", - "retrieved_timestamp": "1762652580.1200871", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "databricks/dolly-v2-3b", - "developer": "databricks", - "inference_platform": "unknown", - "id": "databricks/dolly-v2-3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22471597583301195 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30792785961544844 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33378125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11452792553191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 3.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/databricks/databricks_dolly-v2-7b/68f999d7-2dc2-4b3c-ab02-6140387893c0.json b/leaderboard_data/HFOpenLLMv2/databricks/databricks_dolly-v2-7b/68f999d7-2dc2-4b3c-ab02-6140387893c0.json deleted file mode 100644 index 83c7f17dfaace5736ae90d3da97bec65ad5b046c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/databricks/databricks_dolly-v2-7b/68f999d7-2dc2-4b3c-ab02-6140387893c0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/databricks_dolly-v2-7b/1762652580.120286", - "retrieved_timestamp": "1762652580.120287", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "databricks/dolly-v2-7b", - "developer": "databricks", - "inference_platform": "unknown", - "id": "databricks/dolly-v2-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2009856070781083 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31730628122070326 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35530208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1149434840425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/davidkim205/davidkim205_Rhea-72b-v0.5/106de4e2-a8d3-40d3-bdbc-0b95930e9ba6.json b/leaderboard_data/HFOpenLLMv2/davidkim205/davidkim205_Rhea-72b-v0.5/106de4e2-a8d3-40d3-bdbc-0b95930e9ba6.json deleted file mode 100644 index 91c621166bae059b694e8bc17f674838102d9158..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/davidkim205/davidkim205_Rhea-72b-v0.5/106de4e2-a8d3-40d3-bdbc-0b95930e9ba6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/davidkim205_Rhea-72b-v0.5/1762652580.1208682", - "retrieved_timestamp": "1762652580.1208699", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "davidkim205/Rhea-72b-v0.5", - "developer": "davidkim205", - "inference_platform": "unknown", - "id": "davidkim205/Rhea-72b-v0.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014538092261865185 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30783395929068597 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17371601208459214 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42413541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11660571808510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 72.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/davidkim205/davidkim205_nox-solar-10.7b-v4/fcc755d0-6269-49e6-890b-4a14417601a1.json b/leaderboard_data/HFOpenLLMv2/davidkim205/davidkim205_nox-solar-10.7b-v4/fcc755d0-6269-49e6-890b-4a14417601a1.json deleted file mode 100644 index 31d77ef786c94624460d1116dfb7ca207d3dc822..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/davidkim205/davidkim205_nox-solar-10.7b-v4/fcc755d0-6269-49e6-890b-4a14417601a1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/davidkim205_nox-solar-10.7b-v4/1762652580.1212", - "retrieved_timestamp": "1762652580.1212008", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "davidkim205/nox-solar-10.7b-v4", - "developer": "davidkim205", - "inference_platform": "unknown", - "id": "davidkim205/nox-solar-10.7b-v4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3753418706809044 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4814038018918371 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42984375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3332779255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-llm-67b-chat/eeea1c5c-bf81-4533-aace-ccb85153320f.json b/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-llm-67b-chat/eeea1c5c-bf81-4533-aace-ccb85153320f.json deleted file mode 100644 index 056c4fa8610d864e25b36d0db953881ebd01649f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-llm-67b-chat/eeea1c5c-bf81-4533-aace-ccb85153320f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/deepseek-ai_deepseek-llm-67b-chat/1762652580.1230679", - "retrieved_timestamp": "1762652580.1230688", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "deepseek-ai/deepseek-llm-67b-chat", - "developer": "deepseek-ai", - "inference_platform": "unknown", - "id": "deepseek-ai/deepseek-llm-67b-chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5587153197959193 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5243416179742358 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09290030211480363 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5058645833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3943650265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 67.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-llm-7b-base/e11d46c2-c121-4c74-94ae-e6ec9a5898af.json b/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-llm-7b-base/e11d46c2-c121-4c74-94ae-e6ec9a5898af.json deleted file mode 100644 index 47dcc8706007115b9f8025b27bb373f546a9304c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-llm-7b-base/e11d46c2-c121-4c74-94ae-e6ec9a5898af.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/deepseek-ai_deepseek-llm-7b-base/1762652580.1234062", - "retrieved_timestamp": "1762652580.1234071", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "deepseek-ai/deepseek-llm-7b-base", - "developer": "deepseek-ai", - "inference_platform": "unknown", - "id": "deepseek-ai/deepseek-llm-7b-base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.217871913190335 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35030315829299524 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37378124999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18060172872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-llm-7b-chat/b9dd96f5-6ab0-4df4-9ee2-bd34c4c9fb05.json b/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-llm-7b-chat/b9dd96f5-6ab0-4df4-9ee2-bd34c4c9fb05.json deleted file mode 100644 index aade62d711ddb6c37b8621a550081f298ea8af08..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-llm-7b-chat/b9dd96f5-6ab0-4df4-9ee2-bd34c4c9fb05.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/deepseek-ai_deepseek-llm-7b-chat/1762652580.123629", - "retrieved_timestamp": "1762652580.12363", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "deepseek-ai/deepseek-llm-7b-chat", - "developer": "deepseek-ai", - "inference_platform": "unknown", - "id": "deepseek-ai/deepseek-llm-7b-chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4170822307034225 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3632079760108669 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46677083333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21334773936170212 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-moe-16b-base/32767af1-f01b-42ca-a8e2-6fecc5af4bfc.json b/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-moe-16b-base/32767af1-f01b-42ca-a8e2-6fecc5af4bfc.json deleted file mode 100644 index 01dd2b9ecfac46392a8c10a42e6e87a078e4f1b9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-moe-16b-base/32767af1-f01b-42ca-a8e2-6fecc5af4bfc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/deepseek-ai_deepseek-moe-16b-base/1762652580.123848", - "retrieved_timestamp": "1762652580.123849", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "deepseek-ai/deepseek-moe-16b-base", - "developer": "deepseek-ai", - "inference_platform": "unknown", - "id": "deepseek-ai/deepseek-moe-16b-base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2449744455821664 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3409461055246395 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36578125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1505152925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "DeepseekForCausalLM", - "params_billions": 16.376 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-moe-16b-chat/81c514f2-5a06-4d50-8c00-dc8b97529f46.json b/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-moe-16b-chat/81c514f2-5a06-4d50-8c00-dc8b97529f46.json deleted file mode 100644 index ba3087ba1181987d93b3800330afc2543d234dac..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-moe-16b-chat/81c514f2-5a06-4d50-8c00-dc8b97529f46.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/deepseek-ai_deepseek-moe-16b-chat/1762652580.1240609", - "retrieved_timestamp": "1762652580.124062", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "deepseek-ai/deepseek-moe-16b-chat", - "developer": "deepseek-ai", - "inference_platform": "unknown", - "id": "deepseek-ai/deepseek-moe-16b-chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36629919724109805 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3274953026448241 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0256797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22483221476510068 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38076041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1963929521276596 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "DeepseekForCausalLM", - "params_billions": 16.376 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/dfurman/dfurman_CalmeRys-78B-Orpo-v0.1/31d8cf18-7b35-438e-8dc6-cdba0f593348.json b/leaderboard_data/HFOpenLLMv2/dfurman/dfurman_CalmeRys-78B-Orpo-v0.1/31d8cf18-7b35-438e-8dc6-cdba0f593348.json deleted file mode 100644 index 25f02e5af468c6aad25a9be8bcc543c3dc84d37f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/dfurman/dfurman_CalmeRys-78B-Orpo-v0.1/31d8cf18-7b35-438e-8dc6-cdba0f593348.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dfurman_CalmeRys-78B-Orpo-v0.1/1762652580.124436", - "retrieved_timestamp": "1762652580.124437", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dfurman/CalmeRys-78B-Orpo-v0.1", - "developer": "dfurman", - "inference_platform": "unknown", - "id": "dfurman/CalmeRys-78B-Orpo-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8163273447785211 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7262282792249927 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4001677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5901770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7012134308510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 77.965 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/dicta-il/dicta-il_dictalm2.0-instruct/4fc01471-7a04-4f46-a973-42f5a3fd67be.json b/leaderboard_data/HFOpenLLMv2/dicta-il/dicta-il_dictalm2.0-instruct/4fc01471-7a04-4f46-a973-42f5a3fd67be.json deleted file mode 100644 index 292a1e9e36cda4e042ba96162c77f23f5ef5d27e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/dicta-il/dicta-il_dictalm2.0-instruct/4fc01471-7a04-4f46-a973-42f5a3fd67be.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dicta-il_dictalm2.0-instruct/1762652580.126274", - "retrieved_timestamp": "1762652580.126276", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dicta-il/dictalm2.0-instruct", - "developer": "dicta-il", - "inference_platform": "unknown", - "id": "dicta-il/dictalm2.0-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44121264910437635 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42560784985912875 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.022658610271903322 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39458333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2604720744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.251 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/dicta-il/dicta-il_dictalm2.0/613c1922-270a-4e8b-ae9d-20fa25573258.json b/leaderboard_data/HFOpenLLMv2/dicta-il/dicta-il_dictalm2.0/613c1922-270a-4e8b-ae9d-20fa25573258.json deleted file mode 100644 index 56fed06a5aa3108335f8dba330a1f382fc01a2f2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/dicta-il/dicta-il_dictalm2.0/613c1922-270a-4e8b-ae9d-20fa25573258.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dicta-il_dictalm2.0/1762652580.125907", - "retrieved_timestamp": "1762652580.125909", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dicta-il/dictalm2.0", - "developer": "dicta-il", - "inference_platform": "unknown", - "id": "dicta-il/dictalm2.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24132745559559746 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4017869112495909 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38196874999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2604720744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.251 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/divyanshukunwar/divyanshukunwar_SASTRI_1_9B/f0ccf0c5-269f-46e1-a13e-b54f2903779b.json b/leaderboard_data/HFOpenLLMv2/divyanshukunwar/divyanshukunwar_SASTRI_1_9B/f0ccf0c5-269f-46e1-a13e-b54f2903779b.json deleted file mode 100644 index 443b8a915bd2d62bb14d0ab7f336e9c14b09c315..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/divyanshukunwar/divyanshukunwar_SASTRI_1_9B/f0ccf0c5-269f-46e1-a13e-b54f2903779b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/divyanshukunwar_SASTRI_1_9B/1762652580.1269271", - "retrieved_timestamp": "1762652580.1269279", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "divyanshukunwar/SASTRI_1_9B", - "developer": "divyanshukunwar", - "inference_platform": "unknown", - "id": "divyanshukunwar/SASTRI_1_9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4207292206899914 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4680499051118341 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3831145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187333776595745 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 5.211 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/djuna-test-lab/djuna-test-lab_TEST-L3.2-ReWish-3B-ties-w-base/f64d7325-38eb-4cd4-80b3-bd63d4acb72f.json b/leaderboard_data/HFOpenLLMv2/djuna-test-lab/djuna-test-lab_TEST-L3.2-ReWish-3B-ties-w-base/f64d7325-38eb-4cd4-80b3-bd63d4acb72f.json deleted file mode 100644 index e9a4c922f5e5e894239200083c8e052a8101a59e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/djuna-test-lab/djuna-test-lab_TEST-L3.2-ReWish-3B-ties-w-base/f64d7325-38eb-4cd4-80b3-bd63d4acb72f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/djuna-test-lab_TEST-L3.2-ReWish-3B-ties-w-base/1762652580.131253", - "retrieved_timestamp": "1762652580.131254", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "djuna-test-lab/TEST-L3.2-ReWish-3B-ties-w-base", - "developer": "djuna-test-lab", - "inference_platform": "unknown", - "id": "djuna-test-lab/TEST-L3.2-ReWish-3B-ties-w-base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.635252241829457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.449540552927623 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13670694864048338 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37775 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31258311170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/djuna-test-lab/djuna-test-lab_TEST-L3.2-ReWish-3B/6d57a63e-0fa7-442b-9156-5a8985e04762.json b/leaderboard_data/HFOpenLLMv2/djuna-test-lab/djuna-test-lab_TEST-L3.2-ReWish-3B/6d57a63e-0fa7-442b-9156-5a8985e04762.json deleted file mode 100644 index 1cea8806eb208f1fedb53210dfe765fb486523a9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/djuna-test-lab/djuna-test-lab_TEST-L3.2-ReWish-3B/6d57a63e-0fa7-442b-9156-5a8985e04762.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/djuna-test-lab_TEST-L3.2-ReWish-3B/1762652580.131", - "retrieved_timestamp": "1762652580.131001", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "djuna-test-lab/TEST-L3.2-ReWish-3B", - "developer": "djuna-test-lab", - "inference_platform": "unknown", - "id": "djuna-test-lab/TEST-L3.2-ReWish-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6367759766308949 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.449540552927623 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13670694864048338 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37775 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31258311170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_G2-BigGSHT-27B-2/69cc67cc-52f9-464a-ab04-b00bb3d8c459.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_G2-BigGSHT-27B-2/69cc67cc-52f9-464a-ab04-b00bb3d8c459.json deleted file mode 100644 index 03d916782d7f3bb86cbd55bb5e77e9eac64992f6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_G2-BigGSHT-27B-2/69cc67cc-52f9-464a-ab04-b00bb3d8c459.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/djuna_G2-BigGSHT-27B-2/1762652580.1272058", - "retrieved_timestamp": "1762652580.1272068", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "djuna/G2-BigGSHT-27B-2", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/G2-BigGSHT-27B-2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7974430067775724 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.641474454273013 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2348942598187311 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36325503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40720833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45279255319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_G2-GSHT/b012b4a9-52d9-4b75-b80d-819579572f05.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_G2-GSHT/b012b4a9-52d9-4b75-b80d-819579572f05.json deleted file mode 100644 index 166684ee1b01ceb21a18fe08f7002a253f84353d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_G2-GSHT/b012b4a9-52d9-4b75-b80d-819579572f05.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/djuna_G2-GSHT/1762652580.127527", - "retrieved_timestamp": "1762652580.127528", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "djuna/G2-GSHT", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/G2-GSHT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5630116978505919 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5269730491270207 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40057291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070146276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-ForStHS/2d9e083d-2c5e-4f42-ab27-6f0c150ee4db.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-ForStHS/2d9e083d-2c5e-4f42-ab27-6f0c150ee4db.json deleted file mode 100644 index cd6dead2f9a89ffb5989b1398b0aef44e3848e3b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-ForStHS/2d9e083d-2c5e-4f42-ab27-6f0c150ee4db.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/djuna_L3.1-ForStHS/1762652580.128124", - "retrieved_timestamp": "1762652580.128125", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "djuna/L3.1-ForStHS", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/L3.1-ForStHS" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7813313120298586 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5202703381267152 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15030211480362538 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40264583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37350398936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-Promissum_Mane-8B-Della-1.5-calc/f738c507-0826-4d7a-a999-8a01274d8697.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-Promissum_Mane-8B-Della-1.5-calc/f738c507-0826-4d7a-a999-8a01274d8697.json deleted file mode 100644 index bc7e36f8fd5d63c782a8a38080f40b68a31968c4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-Promissum_Mane-8B-Della-1.5-calc/f738c507-0826-4d7a-a999-8a01274d8697.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/djuna_L3.1-Promissum_Mane-8B-Della-1.5-calc/1762652580.1283488", - "retrieved_timestamp": "1762652580.12835", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "djuna/L3.1-Promissum_Mane-8B-Della-1.5-calc", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/L3.1-Promissum_Mane-8B-Della-1.5-calc" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7235291249440374 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5432920704935255 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16389728096676737 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42528125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.390375664893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-Promissum_Mane-8B-Della-calc/54d2c316-3c41-4d13-879d-a23c071a6885.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-Promissum_Mane-8B-Della-calc/54d2c316-3c41-4d13-879d-a23c071a6885.json deleted file mode 100644 index 52c4e55aa6920709deff5ed378a664963c3bda9d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-Promissum_Mane-8B-Della-calc/54d2c316-3c41-4d13-879d-a23c071a6885.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/djuna_L3.1-Promissum_Mane-8B-Della-calc/1762652580.128573", - "retrieved_timestamp": "1762652580.128574", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "djuna/L3.1-Promissum_Mane-8B-Della-calc", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/L3.1-Promissum_Mane-8B-Della-calc" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.544152847777231 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.548587625935678 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18429003021148035 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4229895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3801529255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-Purosani-2-8B/f1cc7f8d-72da-40ef-8cb1-f069cd0c052e.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-Purosani-2-8B/f1cc7f8d-72da-40ef-8cb1-f069cd0c052e.json deleted file mode 100644 index ca8de8972bde41f6227dde6b4c490809a48570c2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-Purosani-2-8B/f1cc7f8d-72da-40ef-8cb1-f069cd0c052e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/djuna_L3.1-Purosani-2-8B/1762652580.128782", - "retrieved_timestamp": "1762652580.128783", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "djuna/L3.1-Purosani-2-8B", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/L3.1-Purosani-2-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4988153654525548 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5182122256069372 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38162499999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3751662234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-Suze-Vume-calc/3a48a9ec-61a5-45fd-903a-de2ef90ef13e.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-Suze-Vume-calc/3a48a9ec-61a5-45fd-903a-de2ef90ef13e.json deleted file mode 100644 index ed3eadb3c6476a33c6d07cbf927aaf2f353c1c35..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-Suze-Vume-calc/3a48a9ec-61a5-45fd-903a-de2ef90ef13e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/djuna_L3.1-Suze-Vume-calc/1762652580.128992", - "retrieved_timestamp": "1762652580.128992", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "djuna/L3.1-Suze-Vume-calc", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/L3.1-Suze-Vume-calc" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7296739318341999 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.516421105092519 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38429166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35147938829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_MN-Chinofun-12B-2/7b384a2a-50c5-4c04-a9dd-5a9acefbd81f.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_MN-Chinofun-12B-2/7b384a2a-50c5-4c04-a9dd-5a9acefbd81f.json deleted file mode 100644 index 15cc006f70a60e4f2d9eca7e62863b8a267151e2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_MN-Chinofun-12B-2/7b384a2a-50c5-4c04-a9dd-5a9acefbd81f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/djuna_MN-Chinofun-12B-2/1762652580.129499", - "retrieved_timestamp": "1762652580.1295", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "djuna/MN-Chinofun-12B-2", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/MN-Chinofun-12B-2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6170671595810228 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5036959998266032 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13066465256797583 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42683333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615359042553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_MN-Chinofun-12B-3/32a4d80a-9d28-47f4-b68f-36e95a400bf2.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_MN-Chinofun-12B-3/32a4d80a-9d28-47f4-b68f-36e95a400bf2.json deleted file mode 100644 index 8c1b940741195f0225dc52cf063e778bdfd7ad6c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_MN-Chinofun-12B-3/32a4d80a-9d28-47f4-b68f-36e95a400bf2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/djuna_MN-Chinofun-12B-3/1762652580.129836", - "retrieved_timestamp": "1762652580.129837", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "djuna/MN-Chinofun-12B-3", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/MN-Chinofun-12B-3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3052744495715812 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.53478574603334 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10045317220543806 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4197916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3026097074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_MN-Chinofun-12B-4/4f09e60c-e68a-426c-ac7e-f5e6755e14be.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_MN-Chinofun-12B-4/4f09e60c-e68a-426c-ac7e-f5e6755e14be.json deleted file mode 100644 index a90ff54cc784823af88c862931f9bc04a48d4ea5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_MN-Chinofun-12B-4/4f09e60c-e68a-426c-ac7e-f5e6755e14be.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/djuna_MN-Chinofun-12B-4/1762652580.13009", - "retrieved_timestamp": "1762652580.130091", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "djuna/MN-Chinofun-12B-4", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/MN-Chinofun-12B-4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5404305021786637 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5347693369790583 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4306770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3497340425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_MN-Chinofun/023756a1-66cc-423a-803b-0d8b0f368bd2.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_MN-Chinofun/023756a1-66cc-423a-803b-0d8b0f368bd2.json deleted file mode 100644 index c2b824b9b526698c61ddc10ec42dd89f0878df04..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_MN-Chinofun/023756a1-66cc-423a-803b-0d8b0f368bd2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/djuna_MN-Chinofun/1762652580.1291971", - "retrieved_timestamp": "1762652580.1291971", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "djuna/MN-Chinofun", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/MN-Chinofun" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6110220880596817 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49527033812671534 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13066465256797583 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40835416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36028922872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_Q2.5-Partron-7B/b045b20a-cdbf-4495-89ae-b235ada2e9e0.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_Q2.5-Partron-7B/b045b20a-cdbf-4495-89ae-b235ada2e9e0.json deleted file mode 100644 index bf2114f9a6266688be8fd42e2869869f08ab84f2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_Q2.5-Partron-7B/b045b20a-cdbf-4495-89ae-b235ada2e9e0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/djuna_Q2.5-Partron-7B/1762652580.130363", - "retrieved_timestamp": "1762652580.130364", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "djuna/Q2.5-Partron-7B", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/Q2.5-Partron-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7321218810533828 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5418474850726388 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4826283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41654166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4282746010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_Q2.5-Veltha-14B-0.5/258520cb-360a-4629-be8e-e4ffca8a81b2.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_Q2.5-Veltha-14B-0.5/258520cb-360a-4629-be8e-e4ffca8a81b2.json deleted file mode 100644 index f99e4a9ce14ef4b644779e91aa682f23e283237b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_Q2.5-Veltha-14B-0.5/258520cb-360a-4629-be8e-e4ffca8a81b2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/djuna_Q2.5-Veltha-14B-0.5/1762652580.13079", - "retrieved_timestamp": "1762652580.130791", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "djuna/Q2.5-Veltha-14B-0.5", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/Q2.5-Veltha-14B-0.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7795826185631901 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6523026688308357 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43731117824773413 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36828859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43390625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5295046542553191 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_Q2.5-Veltha-14B/0a9560cd-d3e2-4d41-b83c-f321bcfc9c3c.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_Q2.5-Veltha-14B/0a9560cd-d3e2-4d41-b83c-f321bcfc9c3c.json deleted file mode 100644 index 1b7da584c5836128234a01d30b0ed44493a3f73e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_Q2.5-Veltha-14B/0a9560cd-d3e2-4d41-b83c-f321bcfc9c3c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/djuna_Q2.5-Veltha-14B/1762652580.130576", - "retrieved_timestamp": "1762652580.1305768", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "djuna/Q2.5-Veltha-14B", - "developer": "djuna", - "inference_platform": "unknown", - "id": "djuna/Q2.5-Veltha-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8291666112581284 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.648421390292023 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4788519637462236 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35906040268456374 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41942708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5298371010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Llama-3-8B-Instruct/85472ae2-d5f0-4896-811b-d4217241bcef.json b/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Llama-3-8B-Instruct/85472ae2-d5f0-4896-811b-d4217241bcef.json deleted file mode 100644 index 799da164efbd3cf978b52263f3cd58f8fff3f69a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Llama-3-8B-Instruct/85472ae2-d5f0-4896-811b-d4217241bcef.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Llama-3-8B-Instruct/1762652580.131744", - "retrieved_timestamp": "1762652580.131744", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dnhkng/RYS-Llama-3-8B-Instruct", - "developer": "dnhkng", - "inference_platform": "unknown", - "id": "dnhkng/RYS-Llama-3-8B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6957772044841022 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4808708123069005 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33834375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.355718085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Llama-3-Huge-Instruct/0e8dfce1-b0d3-4ba5-a3be-ba6f52421841.json b/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Llama-3-Huge-Instruct/0e8dfce1-b0d3-4ba5-a3be-ba6f52421841.json deleted file mode 100644 index 431b46ae7241d28f69f44cdf54b3ed251108222b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Llama-3-Huge-Instruct/0e8dfce1-b0d3-4ba5-a3be-ba6f52421841.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Llama-3-Huge-Instruct/1762652580.1319628", - "retrieved_timestamp": "1762652580.131964", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dnhkng/RYS-Llama-3-Huge-Instruct", - "developer": "dnhkng", - "inference_platform": "unknown", - "id": "dnhkng/RYS-Llama-3-Huge-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7685917809190725 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6480872171360044 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22885196374622357 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4207604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.510970744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 99.646 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Llama-3-Large-Instruct/f9485436-6935-422f-9eb1-ee7faeb231d1.json b/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Llama-3-Large-Instruct/f9485436-6935-422f-9eb1-ee7faeb231d1.json deleted file mode 100644 index 086883e9115a3a11d7748c8f2e93a9cc1b1089fc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Llama-3-Large-Instruct/f9485436-6935-422f-9eb1-ee7faeb231d1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Llama-3-Large-Instruct/1762652580.132239", - "retrieved_timestamp": "1762652580.132241", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dnhkng/RYS-Llama-3-Large-Instruct", - "developer": "dnhkng", - "inference_platform": "unknown", - "id": "dnhkng/RYS-Llama-3-Large-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8050616807847621 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.65252690724939 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23036253776435045 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41803125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5137134308510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 73.976 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Llama-3.1-8B-Instruct/62dab9bd-df83-4a0b-be94-0ddd981da6e4.json b/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Llama-3.1-8B-Instruct/62dab9bd-df83-4a0b-be94-0ddd981da6e4.json deleted file mode 100644 index 8031673a6f3b0cc049111b1d8bd05a3c4e8f589e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Llama-3.1-8B-Instruct/62dab9bd-df83-4a0b-be94-0ddd981da6e4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Llama-3.1-8B-Instruct/1762652580.132753", - "retrieved_timestamp": "1762652580.1327538", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dnhkng/RYS-Llama-3.1-8B-Instruct", - "developer": "dnhkng", - "inference_platform": "unknown", - "id": "dnhkng/RYS-Llama-3.1-8B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7684920455502511 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5163645317446665 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3681041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36394614361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 8.685 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Medium/ca1e127b-ded1-4015-85b9-be134c26644d.json b/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Medium/ca1e127b-ded1-4015-85b9-be134c26644d.json deleted file mode 100644 index c65070ff874a4e0e3b4dd8ab6cb10923f62d4ade..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Medium/ca1e127b-ded1-4015-85b9-be134c26644d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Medium/1762652580.131469", - "retrieved_timestamp": "1762652580.13147", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dnhkng/RYS-Medium", - "developer": "dnhkng", - "inference_platform": "unknown", - "id": "dnhkng/RYS-Medium" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4406131287206833 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6284726872432828 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40692708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4325964095744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 18.731 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Phi-3-medium-4k-instruct/94f92919-36fb-4aed-8c0c-2bee0cd1d301.json b/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Phi-3-medium-4k-instruct/94f92919-36fb-4aed-8c0c-2bee0cd1d301.json deleted file mode 100644 index 3f8b02d4c4d01c350319de2b1234e9f1bd576dcc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Phi-3-medium-4k-instruct/94f92919-36fb-4aed-8c0c-2bee0cd1d301.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Phi-3-medium-4k-instruct/1762652580.133586", - "retrieved_timestamp": "1762652580.133587", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dnhkng/RYS-Phi-3-medium-4k-instruct", - "developer": "dnhkng", - "inference_platform": "unknown", - "id": "dnhkng/RYS-Phi-3-medium-4k-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4391392616036561 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6226313539198264 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1608761329305136 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3548657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42528125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.484624335106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 17.709 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-XLarge-base/1b0bb4ca-9553-4ddd-bf35-cab66685668d.json b/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-XLarge-base/1b0bb4ca-9553-4ddd-bf35-cab66685668d.json deleted file mode 100644 index 133d4499adb35475aafe598cb86aabebaf6bb721..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-XLarge-base/1b0bb4ca-9553-4ddd-bf35-cab66685668d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dnhkng_RYS-XLarge-base/1762652580.134071", - "retrieved_timestamp": "1762652580.134072", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dnhkng/RYS-XLarge-base", - "developer": "dnhkng", - "inference_platform": "unknown", - "id": "dnhkng/RYS-XLarge-base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7910233735377686 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7047291858548728 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4902708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5430518617021277 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 77.972 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-XLarge/a2a90b7e-f6db-408a-b5df-284d0b4a6353.json b/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-XLarge/a2a90b7e-f6db-408a-b5df-284d0b4a6353.json deleted file mode 100644 index a390a50e0de3a43febab80a65d9ef31ff99081f0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-XLarge/a2a90b7e-f6db-408a-b5df-284d0b4a6353.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dnhkng_RYS-XLarge/1762652580.1338398", - "retrieved_timestamp": "1762652580.1338408", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dnhkng/RYS-XLarge", - "developer": "dnhkng", - "inference_platform": "unknown", - "id": "dnhkng/RYS-XLarge" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7995662619627034 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7050033079850099 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.425226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38422818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49696875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5428025265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 77.965 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-XLarge2/6f344c50-fdf3-477e-9a76-558ed61fd509.json b/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-XLarge2/6f344c50-fdf3-477e-9a76-558ed61fd509.json deleted file mode 100644 index 9b7340a3ad032df7d60072cd2696a89b00a4c7b0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-XLarge2/6f344c50-fdf3-477e-9a76-558ed61fd509.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dnhkng_RYS-XLarge2/1762652580.1343", - "retrieved_timestamp": "1762652580.134301", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dnhkng/RYS-XLarge2", - "developer": "dnhkng", - "inference_platform": "unknown", - "id": "dnhkng/RYS-XLarge2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49019712141562166 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6573947106260754 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27492447129909364 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37416107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4508020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5378158244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 77.965 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/dreamgen/dreamgen_WizardLM-2-7B/5ed2650d-d76f-49d6-915b-ac551129913e.json b/leaderboard_data/HFOpenLLMv2/dreamgen/dreamgen_WizardLM-2-7B/5ed2650d-d76f-49d6-915b-ac551129913e.json deleted file mode 100644 index bc1329462ba3a7d85316baca05d18c6266da54bf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/dreamgen/dreamgen_WizardLM-2-7B/5ed2650d-d76f-49d6-915b-ac551129913e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dreamgen_WizardLM-2-7B/1762652580.1345458", - "retrieved_timestamp": "1762652580.134547", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dreamgen/WizardLM-2-7B", - "developer": "dreamgen", - "inference_platform": "unknown", - "id": "dreamgen/WizardLM-2-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45829842595424586 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34867856163972016 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39409374999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2660405585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v1/c402fb6f-6e91-4e33-b847-87371373a6eb.json b/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v1/c402fb6f-6e91-4e33-b847-87371373a6eb.json deleted file mode 100644 index 1d0c653f5d8aa79e064ba4bc470ca03b0e1fb10c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v1/c402fb6f-6e91-4e33-b847-87371373a6eb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v1/1762652580.134872", - "retrieved_timestamp": "1762652580.134874", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dustinwloring1988/Reflexis-8b-chat-v1", - "developer": "dustinwloring1988", - "inference_platform": "unknown", - "id": "dustinwloring1988/Reflexis-8b-chat-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3657750324694034 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4663596290293861 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3753958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3384308510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v2/6475a1f1-0c12-4ab3-89fc-cc5aa1d8145e.json b/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v2/6475a1f1-0c12-4ab3-89fc-cc5aa1d8145e.json deleted file mode 100644 index 3e3a84976812c92ad1b36fbf4a34797707249b0b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v2/6475a1f1-0c12-4ab3-89fc-cc5aa1d8145e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v2/1762652580.135156", - "retrieved_timestamp": "1762652580.135157", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dustinwloring1988/Reflexis-8b-chat-v2", - "developer": "dustinwloring1988", - "inference_platform": "unknown", - "id": "dustinwloring1988/Reflexis-8b-chat-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3912042270065648 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47238018945807153 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163141993957704 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3526354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3377659574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v3/5767ea0d-318c-4c65-9c96-890d27973302.json b/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v3/5767ea0d-318c-4c65-9c96-890d27973302.json deleted file mode 100644 index 2cd71cf082e3557de357d5ab744551e8cef39fb1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v3/5767ea0d-318c-4c65-9c96-890d27973302.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v3/1762652580.1353788", - "retrieved_timestamp": "1762652580.1353788", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dustinwloring1988/Reflexis-8b-chat-v3", - "developer": "dustinwloring1988", - "inference_platform": "unknown", - "id": "dustinwloring1988/Reflexis-8b-chat-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.536733644507684 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4658310598309874 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12235649546827794 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2424496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35117708333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35480385638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v4/ad9e0902-3542-4994-ae42-4f3ef9f88ab1.json b/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v4/ad9e0902-3542-4994-ae42-4f3ef9f88ab1.json deleted file mode 100644 index c07ced2a0369fafacae6840c46238267d9678fbc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v4/ad9e0902-3542-4994-ae42-4f3ef9f88ab1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v4/1762652580.135605", - "retrieved_timestamp": "1762652580.135605", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dustinwloring1988/Reflexis-8b-chat-v4", - "developer": "dustinwloring1988", - "inference_platform": "unknown", - "id": "dustinwloring1988/Reflexis-8b-chat-v4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4697890486132351 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46860140660011185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23406040268456377 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33930208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3390126329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v5/01c33f76-994a-4a1c-951d-88b34e471498.json b/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v5/01c33f76-994a-4a1c-951d-88b34e471498.json deleted file mode 100644 index bbda0bc1b8dc1765b7fdb1faad34218d7e838433..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v5/01c33f76-994a-4a1c-951d-88b34e471498.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v5/1762652580.135817", - "retrieved_timestamp": "1762652580.135818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dustinwloring1988/Reflexis-8b-chat-v5", - "developer": "dustinwloring1988", - "inference_platform": "unknown", - "id": "dustinwloring1988/Reflexis-8b-chat-v5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42375231053604434 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4781685533183147 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33536458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3217253989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v6/65ce9e6f-cab9-4ccc-af89-de9be928529e.json b/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v6/65ce9e6f-cab9-4ccc-af89-de9be928529e.json deleted file mode 100644 index 6d5af7c44195c96276e66868d6d9a10da4f8208d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v6/65ce9e6f-cab9-4ccc-af89-de9be928529e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v6/1762652580.136029", - "retrieved_timestamp": "1762652580.13603", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dustinwloring1988/Reflexis-8b-chat-v6", - "developer": "dustinwloring1988", - "inference_platform": "unknown", - "id": "dustinwloring1988/Reflexis-8b-chat-v6" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4938939790866014 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4809537068664902 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3753333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.347905585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v7/abadd81a-bd45-4eba-ae77-25190c751085.json b/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v7/abadd81a-bd45-4eba-ae77-25190c751085.json deleted file mode 100644 index d5b8689e32dc90dd790c9c37e57ac31919dabdbe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v7/abadd81a-bd45-4eba-ae77-25190c751085.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v7/1762652580.1362429", - "retrieved_timestamp": "1762652580.136244", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dustinwloring1988/Reflexis-8b-chat-v7", - "developer": "dustinwloring1988", - "inference_platform": "unknown", - "id": "dustinwloring1988/Reflexis-8b-chat-v7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39804828964924177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4809830787114964 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16314199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32215625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3642785904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/dwikitheduck/dwikitheduck_gemma-2-2b-id-instruct/73418e8c-ce10-4ea4-97f6-6f87c2be05a2.json b/leaderboard_data/HFOpenLLMv2/dwikitheduck/dwikitheduck_gemma-2-2b-id-instruct/73418e8c-ce10-4ea4-97f6-6f87c2be05a2.json deleted file mode 100644 index 84a242d49f9116205299de9d51f3667d56b1464f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/dwikitheduck/dwikitheduck_gemma-2-2b-id-instruct/73418e8c-ce10-4ea4-97f6-6f87c2be05a2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dwikitheduck_gemma-2-2b-id-instruct/1762652580.137409", - "retrieved_timestamp": "1762652580.1374102", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dwikitheduck/gemma-2-2b-id-instruct", - "developer": "dwikitheduck", - "inference_platform": "unknown", - "id": "dwikitheduck/gemma-2-2b-id-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38785644312646006 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39621721241423097 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41542708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21733710106382978 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/dwikitheduck/dwikitheduck_gen-inst-1/5117b75d-3060-4434-a40d-01c471563685.json b/leaderboard_data/HFOpenLLMv2/dwikitheduck/dwikitheduck_gen-inst-1/5117b75d-3060-4434-a40d-01c471563685.json deleted file mode 100644 index 571bbba1d5f90ad9c2bdf564386385ceef2e0348..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/dwikitheduck/dwikitheduck_gen-inst-1/5117b75d-3060-4434-a40d-01c471563685.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dwikitheduck_gen-inst-1/1762652580.1376698", - "retrieved_timestamp": "1762652580.137671", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dwikitheduck/gen-inst-1", - "developer": "dwikitheduck", - "inference_platform": "unknown", - "id": "dwikitheduck/gen-inst-1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7750114141588762 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6419926671215591 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4554380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716442953020134 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42054166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5088929521276596 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/dwikitheduck/dwikitheduck_gen-try1-notemp/5bd29754-7f93-42fb-ba9b-7b3a4315bd17.json b/leaderboard_data/HFOpenLLMv2/dwikitheduck/dwikitheduck_gen-try1-notemp/5bd29754-7f93-42fb-ba9b-7b3a4315bd17.json deleted file mode 100644 index 0e1da429d3a09ce1c34d4e72bc6e8a265913d4bc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/dwikitheduck/dwikitheduck_gen-try1-notemp/5bd29754-7f93-42fb-ba9b-7b3a4315bd17.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dwikitheduck_gen-try1-notemp/1762652580.13809", - "retrieved_timestamp": "1762652580.138091", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dwikitheduck/gen-try1-notemp", - "developer": "dwikitheduck", - "inference_platform": "unknown", - "id": "dwikitheduck/gen-try1-notemp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26270961050013963 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.626267088306491 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31797583081570996 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540268456375839 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47141666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5210272606382979 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/dwikitheduck/dwikitheduck_gen-try1/8f00112d-767f-4ac5-ae1c-e37781cf7eec.json b/leaderboard_data/HFOpenLLMv2/dwikitheduck/dwikitheduck_gen-try1/8f00112d-767f-4ac5-ae1c-e37781cf7eec.json deleted file mode 100644 index e7d8115ffb943649601d2233f6a6fb3afd3c50d6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/dwikitheduck/dwikitheduck_gen-try1/8f00112d-767f-4ac5-ae1c-e37781cf7eec.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dwikitheduck_gen-try1/1762652580.137886", - "retrieved_timestamp": "1762652580.137887", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dwikitheduck/gen-try1", - "developer": "dwikitheduck", - "inference_platform": "unknown", - "id": "dwikitheduck/gen-try1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7522052598217175 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6358510933470735 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4415625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5110538563829787 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/dzakwan/dzakwan_dzakwan-MoE-4x7b-Beta/f4ceacae-0b81-44ac-8b9d-31d81e145bab.json b/leaderboard_data/HFOpenLLMv2/dzakwan/dzakwan_dzakwan-MoE-4x7b-Beta/f4ceacae-0b81-44ac-8b9d-31d81e145bab.json deleted file mode 100644 index e3f7597bab3ec508ffd751ce26dec40b40800044..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/dzakwan/dzakwan_dzakwan-MoE-4x7b-Beta/f4ceacae-0b81-44ac-8b9d-31d81e145bab.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dzakwan_dzakwan-MoE-4x7b-Beta/1762652580.138297", - "retrieved_timestamp": "1762652580.138298", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dzakwan/dzakwan-MoE-4x7b-Beta", - "developer": "dzakwan", - "inference_platform": "unknown", - "id": "dzakwan/dzakwan-MoE-4x7b-Beta" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44426011870725235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.514044131159397 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42673958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3107546542553192 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.154 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_Falcon3-8B-Franken-Basestruct/1653400c-137e-4745-8676-eeaf39bbcc13.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_Falcon3-8B-Franken-Basestruct/1653400c-137e-4745-8676-eeaf39bbcc13.json deleted file mode 100644 index afd65e0fbc73fa8fa3f01ce0234aafb7a557a858..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_Falcon3-8B-Franken-Basestruct/1653400c-137e-4745-8676-eeaf39bbcc13.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_Falcon3-8B-Franken-Basestruct/1762652580.138562", - "retrieved_timestamp": "1762652580.1385632", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/Falcon3-8B-Franken-Basestruct", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/Falcon3-8B-Franken-Basestruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17148499315150467 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5462828074770284 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060402684563756 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3554895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3946974734042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.406 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_Falcon3-MoE-2x7B-Insruct/6b208d1e-96f1-4b72-8d31-6c6e43c42111.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_Falcon3-MoE-2x7B-Insruct/6b208d1e-96f1-4b72-8d31-6c6e43c42111.json deleted file mode 100644 index 6206b3a6c6e6b27c20c7a78f7b16f9b543b359fc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_Falcon3-MoE-2x7B-Insruct/6b208d1e-96f1-4b72-8d31-6c6e43c42111.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_Falcon3-MoE-2x7B-Insruct/1762652580.1388721", - "retrieved_timestamp": "1762652580.138873", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/Falcon3-MoE-2x7B-Insruct", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/Falcon3-MoE-2x7B-Insruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7642954028643998 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.564789641564995 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4123867069486405 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4840416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40949135638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 13.401 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_SoRu-0009/d45e7b32-f09d-4185-ac78-d0eb7a4d3823.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_SoRu-0009/d45e7b32-f09d-4185-ac78-d0eb7a4d3823.json deleted file mode 100644 index 67da085439791f3e822af1645eeea04f3d10d97c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_SoRu-0009/d45e7b32-f09d-4185-ac78-d0eb7a4d3823.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_SoRu-0009/1762652580.1407459", - "retrieved_timestamp": "1762652580.140747", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/SoRu-0009", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/SoRu-0009" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25818827378023645 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3149981683579724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3369479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12391954787234043 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_della-70b-test-v1/d9f6c1e9-84be-4666-b64f-5da37cf98202.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_della-70b-test-v1/d9f6c1e9-84be-4666-b64f-5da37cf98202.json deleted file mode 100644 index bf0d712f769293026c1fd499cea750da5978284e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_della-70b-test-v1/d9f6c1e9-84be-4666-b64f-5da37cf98202.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_della-70b-test-v1/1762652580.141174", - "retrieved_timestamp": "1762652580.141175", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/della-70b-test-v1", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/della-70b-test-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49786566310722213 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3029452113782393 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45545833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1574966755319149 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_falcon3-ultraset/e2291d7c-7627-484e-a0c1-1857c642be2b.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_falcon3-ultraset/e2291d7c-7627-484e-a0c1-1857c642be2b.json deleted file mode 100644 index 5844410db6766d6e252649a616ce1b0b9e060df5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_falcon3-ultraset/e2291d7c-7627-484e-a0c1-1857c642be2b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_falcon3-ultraset/1762652580.1413918", - "retrieved_timestamp": "1762652580.141393", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/falcon3-ultraset", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/falcon3-ultraset" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7135123694020753 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5583684420918801 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2122356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48531250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.398188164893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fd-lora-merged-16x32/4d00474d-97e6-4384-82f7-956b2e7268e9.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fd-lora-merged-16x32/4d00474d-97e6-4384-82f7-956b2e7268e9.json deleted file mode 100644 index 00b66f4481b3cc7777324050b0732979d6a58c4d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fd-lora-merged-16x32/4d00474d-97e6-4384-82f7-956b2e7268e9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_fd-lora-merged-16x32/1762652580.141611", - "retrieved_timestamp": "1762652580.141612", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/fd-lora-merged-16x32", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/fd-lora-merged-16x32" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3480897352358409 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3307564619842368 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35142708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12051196808510638 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.776 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fd-lora-merged-64x128/6474672b-7728-4ab5-8fdf-749e996272a2.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fd-lora-merged-64x128/6474672b-7728-4ab5-8fdf-749e996272a2.json deleted file mode 100644 index 28ae169b101aa65e92bf45774c79fcbaf8139f67..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fd-lora-merged-64x128/6474672b-7728-4ab5-8fdf-749e996272a2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_fd-lora-merged-64x128/1762652580.14183", - "retrieved_timestamp": "1762652580.141831", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/fd-lora-merged-64x128", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/fd-lora-merged-64x128" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3281060918363276 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33447107385638297 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18731117824773413 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3368229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15367353723404256 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fp4-14b-it-v1/31618256-7ca8-4a3c-bfbf-4397bf2cf339.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fp4-14b-it-v1/31618256-7ca8-4a3c-bfbf-4397bf2cf339.json deleted file mode 100644 index 1027f18540a3c1a02e2513aa438d3f888f70f2ae..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fp4-14b-it-v1/31618256-7ca8-4a3c-bfbf-4397bf2cf339.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_fp4-14b-it-v1/1762652580.1420429", - "retrieved_timestamp": "1762652580.1420438", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/fp4-14b-it-v1", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/fp4-14b-it-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25346746632269046 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5739715511094247 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35948958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4204621010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fp4-14b-v1-fix/37d01a2d-f8ca-46a3-a4b7-3fa725b4023b.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fp4-14b-v1-fix/37d01a2d-f8ca-46a3-a4b7-3fa725b4023b.json deleted file mode 100644 index 48602e406b9dea5a8bc2054d38dd46ac3b8ede56..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fp4-14b-v1-fix/37d01a2d-f8ca-46a3-a4b7-3fa725b4023b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_fp4-14b-v1-fix/1762652580.142252", - "retrieved_timestamp": "1762652580.1422532", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/fp4-14b-v1-fix", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/fp4-14b-v1-fix" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6741700909143296 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6817274121032688 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4206948640483384 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540268456375839 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4531875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5353224734042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fq2.5-7b-it-normalize_false/a5004f95-0854-40d2-8a71-004875544499.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fq2.5-7b-it-normalize_false/a5004f95-0854-40d2-8a71-004875544499.json deleted file mode 100644 index cfd4a01d39f4ac61fa965ce18623f5f83ec3bea1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fq2.5-7b-it-normalize_false/a5004f95-0854-40d2-8a71-004875544499.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_fq2.5-7b-it-normalize_false/1762652580.142459", - "retrieved_timestamp": "1762652580.1424599", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/fq2.5-7b-it-normalize_false", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/fq2.5-7b-it-normalize_false" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7399156460413925 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.551986272150289 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4622356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46115625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44132313829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fq2.5-7b-it-normalize_true/d0d8274c-7d05-4166-a510-487cb294135e.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fq2.5-7b-it-normalize_true/d0d8274c-7d05-4166-a510-487cb294135e.json deleted file mode 100644 index b5e9bfbb83b7a34915ae30ef9f08a136acbac46d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fq2.5-7b-it-normalize_true/d0d8274c-7d05-4166-a510-487cb294135e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_fq2.5-7b-it-normalize_true/1762652580.1426702", - "retrieved_timestamp": "1762652580.142671", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/fq2.5-7b-it-normalize_true", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/fq2.5-7b-it-normalize_true" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7399156460413925 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.551986272150289 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4622356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46115625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44132313829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_mllama-3.1-8b-instruct/40016b83-0730-4e67-b7e9-3b1d29d9d1be.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_mllama-3.1-8b-instruct/40016b83-0730-4e67-b7e9-3b1d29d9d1be.json deleted file mode 100644 index 98d813390676b81f83f384ce6320b8c46494febd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_mllama-3.1-8b-instruct/40016b83-0730-4e67-b7e9-3b1d29d9d1be.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_mllama-3.1-8b-instruct/1762652580.143588", - "retrieved_timestamp": "1762652580.143589", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/mllama-3.1-8b-instruct", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/mllama-3.1-8b-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3457913890698901 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47176616480333583 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3776435045317221 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.338 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533244680851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_moremerge-upscaled/5c465aeb-c6be-4a22-9cf0-3d9c2558ba39.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_moremerge-upscaled/5c465aeb-c6be-4a22-9cf0-3d9c2558ba39.json deleted file mode 100644 index de2e368846a6f4e75821a11f10a009b4ea2a9128..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_moremerge-upscaled/5c465aeb-c6be-4a22-9cf0-3d9c2558ba39.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_moremerge-upscaled/1762652580.144358", - "retrieved_timestamp": "1762652580.1443589", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/moremerge-upscaled", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/moremerge-upscaled" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1978882697908217 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26977370070980244 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24664429530201343 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35930208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10413896276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 8.545 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_moremerge/38cf2a56-ed33-4f7e-94aa-bf4f15a5a53c.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_moremerge/38cf2a56-ed33-4f7e-94aa-bf4f15a5a53c.json deleted file mode 100644 index e1a75e5d147d6d2bd627cc445ac9e6c7e7148d4d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_moremerge/38cf2a56-ed33-4f7e-94aa-bf4f15a5a53c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_moremerge/1762652580.1440692", - "retrieved_timestamp": "1762652580.14407", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/moremerge", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/moremerge" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20190982149585324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28684447696551024 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35657291666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10654920212765957 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_rmoe-v1/e58aecba-3254-426d-aac2-05a32c3cbdab.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_rmoe-v1/e58aecba-3254-426d-aac2-05a32c3cbdab.json deleted file mode 100644 index aec0041ea3019a75590f7e4da0d4003fd84a2ff1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_rmoe-v1/e58aecba-3254-426d-aac2-05a32c3cbdab.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_rmoe-v1/1762652580.1453388", - "retrieved_timestamp": "1762652580.14534", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/rmoe-v1", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/rmoe-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26500795666609045 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29292907133609175 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0015105740181268882 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36634374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1124501329787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 11.026 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_rufalcon3-3b-it/8f4336f8-1fdb-4a3d-8b9a-2e7c5e156f07.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_rufalcon3-3b-it/8f4336f8-1fdb-4a3d-8b9a-2e7c5e156f07.json deleted file mode 100644 index da62d6188320f38489d920919804f1011c15b0be..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_rufalcon3-3b-it/8f4336f8-1fdb-4a3d-8b9a-2e7c5e156f07.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_rufalcon3-3b-it/1762652580.14555", - "retrieved_timestamp": "1762652580.14555", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/rufalcon3-3b-it", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/rufalcon3-3b-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5942111375594533 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41554222543957625 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1782477341389728 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38953124999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2347905585106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.228 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_testq-32b/d5acc9ed-9fd1-411f-a85c-e790521e7fe4.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_testq-32b/d5acc9ed-9fd1-411f-a85c-e790521e7fe4.json deleted file mode 100644 index afa7e4db708ec26c30e363693d79711eca128c4d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_testq-32b/d5acc9ed-9fd1-411f-a85c-e790521e7fe4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_testq-32b/1762652580.145958", - "retrieved_timestamp": "1762652580.145958", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/testq-32b", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/testq-32b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18759668789921852 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2876549792486152 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0030211480362537764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3714583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11660571808510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 56.165 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_tmoe-v2/0a84406f-a970-4a03-8d2f-c82a8bbd3872.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_tmoe-v2/0a84406f-a970-4a03-8d2f-c82a8bbd3872.json deleted file mode 100644 index 3550073e229135bfe52e389016317b9e7b338dbf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_tmoe-v2/0a84406f-a970-4a03-8d2f-c82a8bbd3872.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_tmoe-v2/1762652580.146366", - "retrieved_timestamp": "1762652580.146367", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/tmoe-v2", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/tmoe-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19026959578363187 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2896740649804915 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4150833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11003989361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 11.026 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_tmoe/0a160c2d-06ed-43c0-8705-bd76e47c093a.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_tmoe/0a160c2d-06ed-43c0-8705-bd76e47c093a.json deleted file mode 100644 index 36945403771a858dc6b767d305dbbe19214390e3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_tmoe/0a160c2d-06ed-43c0-8705-bd76e47c093a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_tmoe/1762652580.1461592", - "retrieved_timestamp": "1762652580.1461592", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/tmoe", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/tmoe" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11930234001338672 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30728601408520645 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2231543624161074 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36990624999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11909906914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 11.026 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_trd-7b-it/3bd7f3c1-772a-45fa-9d71-a6e3dff3b54f.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_trd-7b-it/3bd7f3c1-772a-45fa-9d71-a6e3dff3b54f.json deleted file mode 100644 index 7a5e5975b66eb72112e8ed624eb83502a9c72e40..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_trd-7b-it/3bd7f3c1-772a-45fa-9d71-a6e3dff3b54f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_trd-7b-it/1762652580.146566", - "retrieved_timestamp": "1762652580.1465669", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/trd-7b-it", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/trd-7b-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21847143357402804 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2990238931062931 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3794270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11785239361702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_ud-14b/7e7ffbef-c8d4-47ff-9ae6-7f0701e9e192.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_ud-14b/7e7ffbef-c8d4-47ff-9ae6-7f0701e9e192.json deleted file mode 100644 index 73198648abe7cb5c3773b68b1d3ab2e644ea387a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_ud-14b/7e7ffbef-c8d4-47ff-9ae6-7f0701e9e192.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_ud-14b/1762652580.146786", - "retrieved_timestamp": "1762652580.146786", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/ud-14b", - "developer": "ehristoforu", - "inference_platform": "unknown", - "id": "ehristoforu/ud-14b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4235273518708139 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3323819044961654 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1903323262839879 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23741610738255034 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43942708333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24152260638297873 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/elinas/elinas_Chronos-Gold-12B-1.0/4705d82c-514c-48a1-8f87-4d2b8f9aff6b.json b/leaderboard_data/HFOpenLLMv2/elinas/elinas_Chronos-Gold-12B-1.0/4705d82c-514c-48a1-8f87-4d2b8f9aff6b.json deleted file mode 100644 index de8bcee6d77aa81e2d1a4c1e963af67c36892e9a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/elinas/elinas_Chronos-Gold-12B-1.0/4705d82c-514c-48a1-8f87-4d2b8f9aff6b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/elinas_Chronos-Gold-12B-1.0/1762652580.1470149", - "retrieved_timestamp": "1762652580.147016", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "elinas/Chronos-Gold-12B-1.0", - "developer": "elinas", - "inference_platform": "unknown", - "id": "elinas/Chronos-Gold-12B-1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3165656014929277 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5514664110708439 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06948640483383686 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47398958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.351811835106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/euclaise/euclaise_ReMask-3B/a905005d-85fa-44c9-848b-286f9100bab7.json b/leaderboard_data/HFOpenLLMv2/euclaise/euclaise_ReMask-3B/a905005d-85fa-44c9-848b-286f9100bab7.json deleted file mode 100644 index 360ef38facaec3bafbb311a6c67b7f18a1ab47e3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/euclaise/euclaise_ReMask-3B/a905005d-85fa-44c9-848b-286f9100bab7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/euclaise_ReMask-3B/1762652580.14753", - "retrieved_timestamp": "1762652580.147531", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "euclaise/ReMask-3B", - "developer": "euclaise", - "inference_platform": "unknown", - "id": "euclaise/ReMask-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2419269759792905 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3516779692917367 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33409375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13572140957446807 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "StableLmForCausalLM", - "params_billions": 2.795 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/eworojoshua/eworojoshua_vas-01/f02ca364-4bf8-4f00-aecc-492ac1f0817a.json b/leaderboard_data/HFOpenLLMv2/eworojoshua/eworojoshua_vas-01/f02ca364-4bf8-4f00-aecc-492ac1f0817a.json deleted file mode 100644 index f3b619c60979746bf5095e225d5cfd4bea42032a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/eworojoshua/eworojoshua_vas-01/f02ca364-4bf8-4f00-aecc-492ac1f0817a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/eworojoshua_vas-01/1762652580.1477718", - "retrieved_timestamp": "1762652580.147773", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "eworojoshua/vas-01", - "developer": "eworojoshua", - "inference_platform": "unknown", - "id": "eworojoshua/vas-01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7612479332615238 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5417819433732887 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4735649546827795 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44323958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4347573138297872 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ewre324/ewre324_Thinker-Llama-3.2-3B-Instruct-Reasoning/8bdc63c5-2ed3-4738-8a5c-6b90ba969f99.json b/leaderboard_data/HFOpenLLMv2/ewre324/ewre324_Thinker-Llama-3.2-3B-Instruct-Reasoning/8bdc63c5-2ed3-4738-8a5c-6b90ba969f99.json deleted file mode 100644 index 9b70b31383de8b39c99500c6ed8a4197e6729a05..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ewre324/ewre324_Thinker-Llama-3.2-3B-Instruct-Reasoning/8bdc63c5-2ed3-4738-8a5c-6b90ba969f99.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ewre324_Thinker-Llama-3.2-3B-Instruct-Reasoning/1762652580.148031", - "retrieved_timestamp": "1762652580.148032", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ewre324/Thinker-Llama-3.2-3B-Instruct-Reasoning", - "developer": "ewre324", - "inference_platform": "unknown", - "id": "ewre324/Thinker-Llama-3.2-3B-Instruct-Reasoning" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44388555698878973 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4273125047156003 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36553125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2886469414893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ewre324/ewre324_Thinker-Qwen2.5-0.5B-Instruct-Reasoning/fe29c3e7-463b-45a1-8377-97e7c7f21874.json b/leaderboard_data/HFOpenLLMv2/ewre324/ewre324_Thinker-Qwen2.5-0.5B-Instruct-Reasoning/fe29c3e7-463b-45a1-8377-97e7c7f21874.json deleted file mode 100644 index 7b4e0244c15cf9039587a5036d2c72d7d1e5be1b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ewre324/ewre324_Thinker-Qwen2.5-0.5B-Instruct-Reasoning/fe29c3e7-463b-45a1-8377-97e7c7f21874.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ewre324_Thinker-Qwen2.5-0.5B-Instruct-Reasoning/1762652580.148299", - "retrieved_timestamp": "1762652580.1483", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ewre324/Thinker-Qwen2.5-0.5B-Instruct-Reasoning", - "developer": "ewre324", - "inference_platform": "unknown", - "id": "ewre324/Thinker-Qwen2.5-0.5B-Instruct-Reasoning" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2476473534665798 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3292122979013761 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33821875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16472739361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ewre324/ewre324_Thinker-SmolLM2-135M-Instruct-Reasoning/5a03703c-6934-437c-aaca-2acfdd4ca629.json b/leaderboard_data/HFOpenLLMv2/ewre324/ewre324_Thinker-SmolLM2-135M-Instruct-Reasoning/5a03703c-6934-437c-aaca-2acfdd4ca629.json deleted file mode 100644 index 3a94014a97ba8ce0462d38647eb345d3982a18ce..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ewre324/ewre324_Thinker-SmolLM2-135M-Instruct-Reasoning/5a03703c-6934-437c-aaca-2acfdd4ca629.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ewre324_Thinker-SmolLM2-135M-Instruct-Reasoning/1762652580.148509", - "retrieved_timestamp": "1762652580.14851", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ewre324/Thinker-SmolLM2-135M-Instruct-Reasoning", - "developer": "ewre324", - "inference_platform": "unknown", - "id": "ewre324/Thinker-SmolLM2-135M-Instruct-Reasoning" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25836336476105626 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3071349750892843 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.366125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.109375 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ewre324/ewre324_ewre324-R1-SmolLM2-135M-Distill/6429c440-4d89-4d31-919c-63cde25ba99f.json b/leaderboard_data/HFOpenLLMv2/ewre324/ewre324_ewre324-R1-SmolLM2-135M-Distill/6429c440-4d89-4d31-919c-63cde25ba99f.json deleted file mode 100644 index 2f4e88c8b9bb4a862422ab815dae98b74083aa49..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ewre324/ewre324_ewre324-R1-SmolLM2-135M-Distill/6429c440-4d89-4d31-919c-63cde25ba99f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ewre324_ewre324-R1-SmolLM2-135M-Distill/1762652580.148724", - "retrieved_timestamp": "1762652580.148725", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ewre324/ewre324-R1-SmolLM2-135M-Distill", - "developer": "ewre324", - "inference_platform": "unknown", - "id": "ewre324/ewre324-R1-SmolLM2-135M-Distill" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16489026893088118 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3041695757290421 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3409166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11336436170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/experiment-llm/experiment-llm_exp-3-q-r/7d72dcb1-bc5d-41bf-b333-c21e67b0acd2.json b/leaderboard_data/HFOpenLLMv2/experiment-llm/experiment-llm_exp-3-q-r/7d72dcb1-bc5d-41bf-b333-c21e67b0acd2.json deleted file mode 100644 index bb964112887a2dca9bce48ee2ec476575a4bd4c8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/experiment-llm/experiment-llm_exp-3-q-r/7d72dcb1-bc5d-41bf-b333-c21e67b0acd2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/experiment-llm_exp-3-q-r/1762652580.148931", - "retrieved_timestamp": "1762652580.148932", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "experiment-llm/exp-3-q-r", - "developer": "experiment-llm", - "inference_platform": "unknown", - "id": "experiment-llm/exp-3-q-r" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6035785050333116 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5397159253811645 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27870090634441086 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43154166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43159906914893614 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/facebook/facebook_opt-1.3b/8675526d-af0b-4bf2-b143-123249371076.json b/leaderboard_data/HFOpenLLMv2/facebook/facebook_opt-1.3b/8675526d-af0b-4bf2-b143-123249371076.json deleted file mode 100644 index 3de442eca1b31e205dc4408e6c2f49ce057ba123..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/facebook/facebook_opt-1.3b/8675526d-af0b-4bf2-b143-123249371076.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/facebook_opt-1.3b/1762652580.14919", - "retrieved_timestamp": "1762652580.14919", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "facebook/opt-1.3b", - "developer": "facebook", - "inference_platform": "unknown", - "id": "facebook/opt-1.3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23832985367713222 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3093947052760125 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2424496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.342 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11070478723404255 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "OPTForCausalLM", - "params_billions": 1.3 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/facebook/facebook_opt-30b/1883ddb6-e4cc-4935-81ba-af30af1537e9.json b/leaderboard_data/HFOpenLLMv2/facebook/facebook_opt-30b/1883ddb6-e4cc-4935-81ba-af30af1537e9.json deleted file mode 100644 index 151319769bc718d62460aab8df698b83c0a451d2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/facebook/facebook_opt-30b/1883ddb6-e4cc-4935-81ba-af30af1537e9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/facebook_opt-30b/1762652580.14943", - "retrieved_timestamp": "1762652580.149431", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "facebook/opt-30b", - "developer": "facebook", - "inference_platform": "unknown", - "id": "facebook/opt-30b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2452991396162183 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30703447525623373 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36041666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163563829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "OPTForCausalLM", - "params_billions": 30.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/failspy/failspy_Llama-3-8B-Instruct-MopeyMule/f5bfa461-15bf-4e32-8471-74f456c62fd9.json b/leaderboard_data/HFOpenLLMv2/failspy/failspy_Llama-3-8B-Instruct-MopeyMule/f5bfa461-15bf-4e32-8471-74f456c62fd9.json deleted file mode 100644 index 9cc143c153699c7749d2bd9cd4cd420347d4b379..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/failspy/failspy_Llama-3-8B-Instruct-MopeyMule/f5bfa461-15bf-4e32-8471-74f456c62fd9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/failspy_Llama-3-8B-Instruct-MopeyMule/1762652580.1496441", - "retrieved_timestamp": "1762652580.1496441", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "failspy/Llama-3-8B-Instruct-MopeyMule", - "developer": "failspy", - "inference_platform": "unknown", - "id": "failspy/Llama-3-8B-Instruct-MopeyMule" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6750444376476638 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.383874490132152 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23909395973154363 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35130208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17644614361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/failspy/failspy_Llama-3-8B-Instruct-abliterated/8aa6c90e-a6ee-4dfe-8bf4-b5d256be9cd6.json b/leaderboard_data/HFOpenLLMv2/failspy/failspy_Llama-3-8B-Instruct-abliterated/8aa6c90e-a6ee-4dfe-8bf4-b5d256be9cd6.json deleted file mode 100644 index 43304db70c6a13fbddfa06e9c964ea3483863972..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/failspy/failspy_Llama-3-8B-Instruct-abliterated/8aa6c90e-a6ee-4dfe-8bf4-b5d256be9cd6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/failspy_Llama-3-8B-Instruct-abliterated/1762652580.1499012", - "retrieved_timestamp": "1762652580.149902", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "failspy/Llama-3-8B-Instruct-abliterated", - "developer": "failspy", - "inference_platform": "unknown", - "id": "failspy/Llama-3-8B-Instruct-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5908888416069362 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4353752684977051 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41158333333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2741855053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/failspy/failspy_Meta-Llama-3-70B-Instruct-abliterated-v3.5/e0329607-d832-4252-ad71-81e8a8c4bb31.json b/leaderboard_data/HFOpenLLMv2/failspy/failspy_Meta-Llama-3-70B-Instruct-abliterated-v3.5/e0329607-d832-4252-ad71-81e8a8c4bb31.json deleted file mode 100644 index 31f071b311f83c827bf9e47b76d681ab208cc650..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/failspy/failspy_Meta-Llama-3-70B-Instruct-abliterated-v3.5/e0329607-d832-4252-ad71-81e8a8c4bb31.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/failspy_Meta-Llama-3-70B-Instruct-abliterated-v3.5/1762652580.1501682", - "retrieved_timestamp": "1762652580.1501691", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5", - "developer": "failspy", - "inference_platform": "unknown", - "id": "failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7746867201248244 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.574710022890038 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39818749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44522938829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/failspy/failspy_Meta-Llama-3-8B-Instruct-abliterated-v3/c598dbff-4ab5-4405-b75d-13571ae3d862.json b/leaderboard_data/HFOpenLLMv2/failspy/failspy_Meta-Llama-3-8B-Instruct-abliterated-v3/c598dbff-4ab5-4405-b75d-13571ae3d862.json deleted file mode 100644 index 20fce262096ffb79203f0e63c8312c92f6a30d1e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/failspy/failspy_Meta-Llama-3-8B-Instruct-abliterated-v3/c598dbff-4ab5-4405-b75d-13571ae3d862.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/failspy_Meta-Llama-3-8B-Instruct-abliterated-v3/1762652580.150389", - "retrieved_timestamp": "1762652580.15039", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3", - "developer": "failspy", - "inference_platform": "unknown", - "id": "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7244533393617822 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4924562150856957 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09592145015105741 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36218749999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3653590425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/failspy/failspy_Phi-3-medium-4k-instruct-abliterated-v3/264bc4a6-f0ad-4eef-a519-6d97f8f6ab91.json b/leaderboard_data/HFOpenLLMv2/failspy/failspy_Phi-3-medium-4k-instruct-abliterated-v3/264bc4a6-f0ad-4eef-a519-6d97f8f6ab91.json deleted file mode 100644 index f0b20515798b1dba02ad13290d5629b97c07fe87..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/failspy/failspy_Phi-3-medium-4k-instruct-abliterated-v3/264bc4a6-f0ad-4eef-a519-6d97f8f6ab91.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/failspy_Phi-3-medium-4k-instruct-abliterated-v3/1762652580.1505978", - "retrieved_timestamp": "1762652580.150599", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "failspy/Phi-3-medium-4k-instruct-abliterated-v3", - "developer": "failspy", - "inference_platform": "unknown", - "id": "failspy/Phi-3-medium-4k-instruct-abliterated-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6319299458769398 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6304799176474429 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1593655589123867 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4604166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4399933510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.96 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/failspy/failspy_llama-3-70B-Instruct-abliterated/f31f7ad3-9018-4891-be05-12787728904c.json b/leaderboard_data/HFOpenLLMv2/failspy/failspy_llama-3-70B-Instruct-abliterated/f31f7ad3-9018-4891-be05-12787728904c.json deleted file mode 100644 index 7d7a8c725742421d3f29c04fe526842ab264ae7d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/failspy/failspy_llama-3-70B-Instruct-abliterated/f31f7ad3-9018-4891-be05-12787728904c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/failspy_llama-3-70B-Instruct-abliterated/1762652580.1508029", - "retrieved_timestamp": "1762652580.150804", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "failspy/llama-3-70B-Instruct-abliterated", - "developer": "failspy", - "inference_platform": "unknown", - "id": "failspy/llama-3-70B-Instruct-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8023389052159382 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6464853840398571 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4127604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5145445478723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_TheBeagle-v2beta-32B-MGS/63bdc7e2-6518-4da4-81f4-74aab25f7a5e.json b/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_TheBeagle-v2beta-32B-MGS/63bdc7e2-6518-4da4-81f4-74aab25f7a5e.json deleted file mode 100644 index e23f480a5cc0240691efb418e9241f58c28639fa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_TheBeagle-v2beta-32B-MGS/63bdc7e2-6518-4da4-81f4-74aab25f7a5e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/fblgit_TheBeagle-v2beta-32B-MGS/1762652580.1510022", - "retrieved_timestamp": "1762652580.151003", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "fblgit/TheBeagle-v2beta-32B-MGS", - "developer": "fblgit", - "inference_platform": "unknown", - "id": "fblgit/TheBeagle-v2beta-32B-MGS" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.518074265171966 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7032634749563558 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4947129909365559 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3825503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.50075 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5915059840425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_TheBeagle-v2beta-32B-MGS/8338dd8a-88c2-42f8-9d67-13b852e3c0ea.json b/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_TheBeagle-v2beta-32B-MGS/8338dd8a-88c2-42f8-9d67-13b852e3c0ea.json deleted file mode 100644 index bed0fee091f745a0bf5862ebc224e3b1d0d5f640..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_TheBeagle-v2beta-32B-MGS/8338dd8a-88c2-42f8-9d67-13b852e3c0ea.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/fblgit_TheBeagle-v2beta-32B-MGS/1762652580.151249", - "retrieved_timestamp": "1762652580.151249", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "fblgit/TheBeagle-v2beta-32B-MGS", - "developer": "fblgit", - "inference_platform": "unknown", - "id": "fblgit/TheBeagle-v2beta-32B-MGS" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4503051902285935 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.703542441088263 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3942598187311178 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.401006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5021145833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5910904255319149 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_UNA-SimpleSmaug-34b-v1beta/f98b051e-0984-423d-89c0-352368168d75.json b/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_UNA-SimpleSmaug-34b-v1beta/f98b051e-0984-423d-89c0-352368168d75.json deleted file mode 100644 index 2d03947a2da055eb1dfac6a2eb1a948392955994..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_UNA-SimpleSmaug-34b-v1beta/f98b051e-0984-423d-89c0-352368168d75.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/fblgit_UNA-SimpleSmaug-34b-v1beta/1762652580.151433", - "retrieved_timestamp": "1762652580.151433", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "fblgit/UNA-SimpleSmaug-34b-v1beta", - "developer": "fblgit", - "inference_platform": "unknown", - "id": "fblgit/UNA-SimpleSmaug-34b-v1beta" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45562551806983254 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5286654104993475 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4255625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4539561170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_UNA-TheBeagle-7b-v1/454be483-8a45-4bea-a370-5f5a74a924ea.json b/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_UNA-TheBeagle-7b-v1/454be483-8a45-4bea-a370-5f5a74a924ea.json deleted file mode 100644 index dc7f95417adbf998d282d04156bf9a5f2de878a4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_UNA-TheBeagle-7b-v1/454be483-8a45-4bea-a370-5f5a74a924ea.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/fblgit_UNA-TheBeagle-7b-v1/1762652580.151644", - "retrieved_timestamp": "1762652580.151645", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "fblgit/UNA-TheBeagle-7b-v1", - "developer": "fblgit", - "inference_platform": "unknown", - "id": "fblgit/UNA-TheBeagle-7b-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36887236975669 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5028691097522866 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0770392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4564375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3019448138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_UNA-ThePitbull-21.4B-v2/afdf8e40-d87a-4a9c-93a7-a65fe2ae732a.json b/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_UNA-ThePitbull-21.4B-v2/afdf8e40-d87a-4a9c-93a7-a65fe2ae732a.json deleted file mode 100644 index d9e243c3e281fb554af852bffa19af8209b7954a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_UNA-ThePitbull-21.4B-v2/afdf8e40-d87a-4a9c-93a7-a65fe2ae732a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/fblgit_UNA-ThePitbull-21.4B-v2/1762652580.151847", - "retrieved_timestamp": "1762652580.151847", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "fblgit/UNA-ThePitbull-21.4B-v2", - "developer": "fblgit", - "inference_platform": "unknown", - "id": "fblgit/UNA-ThePitbull-21.4B-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3790387283518841 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.635038821016254 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3921666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3515625 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 21.421 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_cybertron-v4-qw7B-MGS/60ac5509-346d-4717-a729-0413fce4b203.json b/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_cybertron-v4-qw7B-MGS/60ac5509-346d-4717-a729-0413fce4b203.json deleted file mode 100644 index bdc0183722655a1ebf52d66109714cb9f3f38a56..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_cybertron-v4-qw7B-MGS/60ac5509-346d-4717-a729-0413fce4b203.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/fblgit_cybertron-v4-qw7B-MGS/1762652580.15205", - "retrieved_timestamp": "1762652580.152051", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "fblgit/cybertron-v4-qw7B-MGS", - "developer": "fblgit", - "inference_platform": "unknown", - "id": "fblgit/cybertron-v4-qw7B-MGS" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6263846593704703 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5591772533435835 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34894259818731116 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43709375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44730718085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_cybertron-v4-qw7B-UNAMGS/8c73c2a6-b2e9-419d-8c00-8a983790ba9b.json b/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_cybertron-v4-qw7B-UNAMGS/8c73c2a6-b2e9-419d-8c00-8a983790ba9b.json deleted file mode 100644 index 9f135528e0d4ce80c4f4a8aede6304f24caa114b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_cybertron-v4-qw7B-UNAMGS/8c73c2a6-b2e9-419d-8c00-8a983790ba9b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/fblgit_cybertron-v4-qw7B-UNAMGS/1762652580.1522481", - "retrieved_timestamp": "1762652580.152249", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "fblgit/cybertron-v4-qw7B-UNAMGS", - "developer": "fblgit", - "inference_platform": "unknown", - "id": "fblgit/cybertron-v4-qw7B-UNAMGS" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6090240561709597 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5642509108139038 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3731117824773414 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4500498670212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_juanako-7b-UNA/f61e534a-06b4-4558-8ee6-227ad1e97699.json b/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_juanako-7b-UNA/f61e534a-06b4-4558-8ee6-227ad1e97699.json deleted file mode 100644 index 306940a73a5ea4deaa749ecbb0c5363d304178b6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_juanako-7b-UNA/f61e534a-06b4-4558-8ee6-227ad1e97699.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/fblgit_juanako-7b-UNA/1762652580.1524491", - "retrieved_timestamp": "1762652580.15245", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "fblgit/juanako-7b-UNA", - "developer": "fblgit", - "inference_platform": "unknown", - "id": "fblgit/juanako-7b-UNA" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4837276204914073 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.507001145736535 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.033987915407854986 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46449999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.277094414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO/a1d14150-3b2e-489f-8d18-8894862e9ab0.json b/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO/a1d14150-3b2e-489f-8d18-8894862e9ab0.json deleted file mode 100644 index 039893179fe04bad02ff2d901afca4fc80550eda..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO/a1d14150-3b2e-489f-8d18-8894862e9ab0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO/1762652580.153163", - "retrieved_timestamp": "1762652580.1531641", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "fblgit/miniclaus-qw1.5B-UNAMGS-GRPO", - "developer": "fblgit", - "inference_platform": "unknown", - "id": "fblgit/miniclaus-qw1.5B-UNAMGS-GRPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3518364605912313 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.423443453814005 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42543749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2945478723404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_miniclaus-qw1.5B-UNAMGS/4b337805-4bd3-4106-bcde-adb7a6fbec23.json b/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_miniclaus-qw1.5B-UNAMGS/4b337805-4bd3-4106-bcde-adb7a6fbec23.json deleted file mode 100644 index 5a98081f1d3fc9a6678f077b5a4eb05a5410731f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_miniclaus-qw1.5B-UNAMGS/4b337805-4bd3-4106-bcde-adb7a6fbec23.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/fblgit_miniclaus-qw1.5B-UNAMGS/1762652580.152649", - "retrieved_timestamp": "1762652580.152649", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "fblgit/miniclaus-qw1.5B-UNAMGS", - "developer": "fblgit", - "inference_platform": "unknown", - "id": "fblgit/miniclaus-qw1.5B-UNAMGS" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3348005514257725 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4238588294007628 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10876132930513595 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42934374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2937167553191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_pancho-v1-qw25-3B-UNAMGS/701cb3af-8916-47ab-b118-1cd778a23e66.json b/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_pancho-v1-qw25-3B-UNAMGS/701cb3af-8916-47ab-b118-1cd778a23e66.json deleted file mode 100644 index 6bb5c79b98297acff5316d5ae4bc4116f69ea7d8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_pancho-v1-qw25-3B-UNAMGS/701cb3af-8916-47ab-b118-1cd778a23e66.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/fblgit_pancho-v1-qw25-3B-UNAMGS/1762652580.153452", - "retrieved_timestamp": "1762652580.153453", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "fblgit/pancho-v1-qw25-3B-UNAMGS", - "developer": "fblgit", - "inference_platform": "unknown", - "id": "fblgit/pancho-v1-qw25-3B-UNAMGS" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.536134124123991 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49258278193390775 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15709969788519637 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4027395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3765791223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.397 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_una-cybertron-7b-v2-bf16/8fc3e145-958b-4f25-bfab-4364bcdfeeb1.json b/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_una-cybertron-7b-v2-bf16/8fc3e145-958b-4f25-bfab-4364bcdfeeb1.json deleted file mode 100644 index bbb435f644dfb73b77d3356e364b6c78091bb07c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_una-cybertron-7b-v2-bf16/8fc3e145-958b-4f25-bfab-4364bcdfeeb1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/fblgit_una-cybertron-7b-v2-bf16/1762652580.153698", - "retrieved_timestamp": "1762652580.1536992", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "fblgit/una-cybertron-7b-v2-bf16", - "developer": "fblgit", - "inference_platform": "unknown", - "id": "fblgit/una-cybertron-7b-v2-bf16" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47371086494944525 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3973388920486269 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4473229166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2442652925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/fhai50032/fhai50032_RolePlayLake-7B/af85e87f-1308-4968-850a-27382f36a63a.json b/leaderboard_data/HFOpenLLMv2/fhai50032/fhai50032_RolePlayLake-7B/af85e87f-1308-4968-850a-27382f36a63a.json deleted file mode 100644 index 81a70a26423f46b1f8511285dd6622f05cf983b5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/fhai50032/fhai50032_RolePlayLake-7B/af85e87f-1308-4968-850a-27382f36a63a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/fhai50032_RolePlayLake-7B/1762652580.153994", - "retrieved_timestamp": "1762652580.153995", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "fhai50032/RolePlayLake-7B", - "developer": "fhai50032", - "inference_platform": "unknown", - "id": "fhai50032/RolePlayLake-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5056594280952318 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5252170095233862 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07250755287009064 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4459270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3159906914893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/flammenai/flammenai_flammen15-gutenberg-DPO-v1-7B/1244b8d9-e832-4f2b-8ae5-52449f6ac38c.json b/leaderboard_data/HFOpenLLMv2/flammenai/flammenai_flammen15-gutenberg-DPO-v1-7B/1244b8d9-e832-4f2b-8ae5-52449f6ac38c.json deleted file mode 100644 index a2aabfdf843011d665079dd64c3dd1bcc0f19897..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/flammenai/flammenai_flammen15-gutenberg-DPO-v1-7B/1244b8d9-e832-4f2b-8ae5-52449f6ac38c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/flammenai_flammen15-gutenberg-DPO-v1-7B/1762652580.155953", - "retrieved_timestamp": "1762652580.155954", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "flammenai/flammen15-gutenberg-DPO-v1-7B", - "developer": "flammenai", - "inference_platform": "unknown", - "id": "flammenai/flammen15-gutenberg-DPO-v1-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47980580415519714 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5202983979716951 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4293125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3185671542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/fluently-lm/fluently-lm_FluentlyLM-Prinum/950d2518-7245-4ed4-9b16-91f944aa8f15.json b/leaderboard_data/HFOpenLLMv2/fluently-lm/fluently-lm_FluentlyLM-Prinum/950d2518-7245-4ed4-9b16-91f944aa8f15.json deleted file mode 100644 index f2e6cac4f8306d59d4cbf40354045bebef1598ed..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/fluently-lm/fluently-lm_FluentlyLM-Prinum/950d2518-7245-4ed4-9b16-91f944aa8f15.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/fluently-lm_FluentlyLM-Prinum/1762652580.156252", - "retrieved_timestamp": "1762652580.1562529", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "fluently-lm/FluentlyLM-Prinum", - "developer": "fluently-lm", - "inference_platform": "unknown", - "id": "fluently-lm/FluentlyLM-Prinum" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.809033364805383 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7143813967889198 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5400302114803626 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38674496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44714583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5807845744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/fluently-lm/fluently-lm_Llama-TI-8B-Instruct/47960f3f-b39c-4641-8a94-fb70f9a6a53f.json b/leaderboard_data/HFOpenLLMv2/fluently-lm/fluently-lm_Llama-TI-8B-Instruct/47960f3f-b39c-4641-8a94-fb70f9a6a53f.json deleted file mode 100644 index 64e676ca9865a66964f5eb48de69e2a7d828a10e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/fluently-lm/fluently-lm_Llama-TI-8B-Instruct/47960f3f-b39c-4641-8a94-fb70f9a6a53f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/fluently-lm_Llama-TI-8B-Instruct/1762652580.156872", - "retrieved_timestamp": "1762652580.156876", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "fluently-lm/Llama-TI-8B-Instruct", - "developer": "fluently-lm", - "inference_platform": "unknown", - "id": "fluently-lm/Llama-TI-8B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7716392505219485 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5252143041749421 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23036253776435045 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38134375000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37258976063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/fluently-sets/fluently-sets_FalconThink3-10B-IT/9329922e-7594-497d-bfab-9c8a18300dc9.json b/leaderboard_data/HFOpenLLMv2/fluently-sets/fluently-sets_FalconThink3-10B-IT/9329922e-7594-497d-bfab-9c8a18300dc9.json deleted file mode 100644 index 24d16ee91df977530a22edd31c9cb8ca147e8c5c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/fluently-sets/fluently-sets_FalconThink3-10B-IT/9329922e-7594-497d-bfab-9c8a18300dc9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/fluently-sets_FalconThink3-10B-IT/1762652580.1573172", - "retrieved_timestamp": "1762652580.1573179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "fluently-sets/FalconThink3-10B-IT", - "developer": "fluently-sets", - "inference_platform": "unknown", - "id": "fluently-sets/FalconThink3-10B-IT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7326216660682544 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.620016981648187 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24471299093655588 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44788541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4434840425531915 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/fluently-sets/fluently-sets_reasoning-1-1k-demo/c63fc7e4-87ae-4516-ad3d-df95693133d5.json b/leaderboard_data/HFOpenLLMv2/fluently-sets/fluently-sets_reasoning-1-1k-demo/c63fc7e4-87ae-4516-ad3d-df95693133d5.json deleted file mode 100644 index 7b495e56a8de7501135bc69b03c1d3b5ebc6eac8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/fluently-sets/fluently-sets_reasoning-1-1k-demo/c63fc7e4-87ae-4516-ad3d-df95693133d5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/fluently-sets_reasoning-1-1k-demo/1762652580.157624", - "retrieved_timestamp": "1762652580.1576252", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "fluently-sets/reasoning-1-1k-demo", - "developer": "fluently-sets", - "inference_platform": "unknown", - "id": "fluently-sets/reasoning-1-1k-demo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7524800861713586 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6396692351083745 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4282477341389728 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4060625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4773936170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp/936751f5-4483-4986-9a8c-cb002feb8858.json b/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp/936751f5-4483-4986-9a8c-cb002feb8858.json deleted file mode 100644 index cb70c885d84f4345d09675532e97a11897b261f8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp/936751f5-4483-4986-9a8c-cb002feb8858.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/formulae_mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp/1762652580.1578538", - "retrieved_timestamp": "1762652580.157855", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "formulae/mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp", - "developer": "formulae", - "inference_platform": "unknown", - "id": "formulae/mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16139288199754429 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29763925404210967 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0015105740181268882 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4219375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11735372340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-elite-v1.1-7b-2-25-2025/7352f47c-8b57-477f-8190-b08b5b23dfb5.json b/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-elite-v1.1-7b-2-25-2025/7352f47c-8b57-477f-8190-b08b5b23dfb5.json deleted file mode 100644 index 9be7f11d84c45342d57ec5193f83092fd5304ddc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-elite-v1.1-7b-2-25-2025/7352f47c-8b57-477f-8190-b08b5b23dfb5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/formulae_mita-elite-v1.1-7b-2-25-2025/1762652580.158112", - "retrieved_timestamp": "1762652580.158113", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "formulae/mita-elite-v1.1-7b-2-25-2025", - "developer": "formulae", - "inference_platform": "unknown", - "id": "formulae/mita-elite-v1.1-7b-2-25-2025" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1249728498162653 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28673660666639783 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3487291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10979055851063829 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-elite-v1.1-gen2-7b-2-25-2025/106c33d2-84fb-4ea3-b2d3-78981834fdb0.json b/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-elite-v1.1-gen2-7b-2-25-2025/106c33d2-84fb-4ea3-b2d3-78981834fdb0.json deleted file mode 100644 index faf317995d58f4c085058f8113c848589c9f4112..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-elite-v1.1-gen2-7b-2-25-2025/106c33d2-84fb-4ea3-b2d3-78981834fdb0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/formulae_mita-elite-v1.1-gen2-7b-2-25-2025/1762652580.158336", - "retrieved_timestamp": "1762652580.158336", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "formulae/mita-elite-v1.1-gen2-7b-2-25-2025", - "developer": "formulae", - "inference_platform": "unknown", - "id": "formulae/mita-elite-v1.1-gen2-7b-2-25-2025" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14108454456397912 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.292375183445424 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35409375000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11012300531914894 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-elite-v1.2-7b-2-26-2025/761560dc-3a0b-481f-8ec2-4d1ea97cfa6f.json b/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-elite-v1.2-7b-2-26-2025/761560dc-3a0b-481f-8ec2-4d1ea97cfa6f.json deleted file mode 100644 index aa0ee908553c481278a8046e97ed47e7b096cdfd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-elite-v1.2-7b-2-26-2025/761560dc-3a0b-481f-8ec2-4d1ea97cfa6f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/formulae_mita-elite-v1.2-7b-2-26-2025/1762652580.158752", - "retrieved_timestamp": "1762652580.158756", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "formulae/mita-elite-v1.2-7b-2-26-2025", - "developer": "formulae", - "inference_platform": "unknown", - "id": "formulae/mita-elite-v1.2-7b-2-26-2025" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14800396281865452 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29300480737441686 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4286666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1186003989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-gen3-7b-2-26-2025/0aa40e02-762d-4a80-932f-f967057c4f50.json b/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-gen3-7b-2-26-2025/0aa40e02-762d-4a80-932f-f967057c4f50.json deleted file mode 100644 index c1e925df3828abac2a78d51d55103e067c821bb4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-gen3-7b-2-26-2025/0aa40e02-762d-4a80-932f-f967057c4f50.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/formulae_mita-gen3-7b-2-26-2025/1762652580.159164", - "retrieved_timestamp": "1762652580.159165", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "formulae/mita-gen3-7b-2-26-2025", - "developer": "formulae", - "inference_platform": "unknown", - "id": "formulae/mita-gen3-7b-2-26-2025" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1964144026737944 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2915705776174771 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3912083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11236702127659574 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-gen3-v1.2-7b-2-26-2025/a28f8779-d2df-4371-b946-472b335f3ca3.json b/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-gen3-v1.2-7b-2-26-2025/a28f8779-d2df-4371-b946-472b335f3ca3.json deleted file mode 100644 index 5b4f474601a3ac1fc7e4377f86abf530d48c3b46..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-gen3-v1.2-7b-2-26-2025/a28f8779-d2df-4371-b946-472b335f3ca3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/formulae_mita-gen3-v1.2-7b-2-26-2025/1762652580.15945", - "retrieved_timestamp": "1762652580.1594508", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "formulae/mita-gen3-v1.2-7b-2-26-2025", - "developer": "formulae", - "inference_platform": "unknown", - "id": "formulae/mita-gen3-v1.2-7b-2-26-2025" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2043577707150361 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30577476935056 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38999999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11278257978723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-math-v2.3-2-25-2025/fa005333-c7b5-4494-a8cb-4edb1f7d00b9.json b/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-math-v2.3-2-25-2025/fa005333-c7b5-4494-a8cb-4edb1f7d00b9.json deleted file mode 100644 index 23da59f95a3b4b4afbec847acf46f3c6dc0a3d96..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-math-v2.3-2-25-2025/fa005333-c7b5-4494-a8cb-4edb1f7d00b9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/formulae_mita-math-v2.3-2-25-2025/1762652580.159737", - "retrieved_timestamp": "1762652580.159738", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "formulae/mita-math-v2.3-2-25-2025", - "developer": "formulae", - "inference_platform": "unknown", - "id": "formulae/mita-math-v2.3-2-25-2025" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13733781920858879 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2949403673764691 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36975 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178523936170212 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-v1-7b/9c629542-6fd0-4cd1-90c7-7f1e95a7a25e.json b/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-v1-7b/9c629542-6fd0-4cd1-90c7-7f1e95a7a25e.json deleted file mode 100644 index 432a63d0695dcb1e5a21a650929df2a328eba43a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-v1-7b/9c629542-6fd0-4cd1-90c7-7f1e95a7a25e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/formulae_mita-v1-7b/1762652580.160087", - "retrieved_timestamp": "1762652580.160088", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "formulae/mita-v1-7b", - "developer": "formulae", - "inference_platform": "unknown", - "id": "formulae/mita-v1-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19723888172271792 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3003216459152819 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.002265861027190332 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41520833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1146941489361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-v1.1-7b-2-24-2025/332cbdd8-96b7-40d5-87c6-3610dcbcdc54.json b/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-v1.1-7b-2-24-2025/332cbdd8-96b7-40d5-87c6-3610dcbcdc54.json deleted file mode 100644 index 12145041c2ffa3b810100677d53a2edbf071589e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-v1.1-7b-2-24-2025/332cbdd8-96b7-40d5-87c6-3610dcbcdc54.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/formulae_mita-v1.1-7b-2-24-2025/1762652580.1604211", - "retrieved_timestamp": "1762652580.1604218", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "formulae/mita-v1.1-7b-2-24-2025", - "developer": "formulae", - "inference_platform": "unknown", - "id": "formulae/mita-v1.1-7b-2-24-2025" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34122018466557624 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5442430910797442 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45569791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4523769946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-v1.2-7b-2-24-2025/a07149d4-66e5-4a0d-b4ae-b696027e821c.json b/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-v1.2-7b-2-24-2025/a07149d4-66e5-4a0d-b4ae-b696027e821c.json deleted file mode 100644 index 0d4d97207b3659cb93603ca33fbfa61ac4c7d0ca..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-v1.2-7b-2-24-2025/a07149d4-66e5-4a0d-b4ae-b696027e821c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/formulae_mita-v1.2-7b-2-24-2025/1762652580.160727", - "retrieved_timestamp": "1762652580.160728", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "formulae/mita-v1.2-7b-2-24-2025", - "developer": "formulae", - "inference_platform": "unknown", - "id": "formulae/mita-v1.2-7b-2-24-2025" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.256415200556745 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4919464940215105 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4879154078549849 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33585438829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/frameai/frameai_Loxa-4B/b8ac82ef-a231-43ee-aaf2-23b0830cfbc3.json b/leaderboard_data/HFOpenLLMv2/frameai/frameai_Loxa-4B/b8ac82ef-a231-43ee-aaf2-23b0830cfbc3.json deleted file mode 100644 index 7306c5d6242be94297a809614c368599df0adea8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/frameai/frameai_Loxa-4B/b8ac82ef-a231-43ee-aaf2-23b0830cfbc3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/frameai_Loxa-4B/1762652580.160984", - "retrieved_timestamp": "1762652580.160984", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "frameai/Loxa-4B", - "developer": "frameai", - "inference_platform": "unknown", - "id": "frameai/Loxa-4B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47648350820268 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42171373309002896 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1095166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33765625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28016954787234044 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.018 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/freewheelin/freewheelin_free-solar-evo-v0.1/c2438204-5b2b-41ce-aa95-27afad6f61a9.json b/leaderboard_data/HFOpenLLMv2/freewheelin/freewheelin_free-solar-evo-v0.1/c2438204-5b2b-41ce-aa95-27afad6f61a9.json deleted file mode 100644 index bc627056b134fe3b59b8233b50964816573e1c39..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/freewheelin/freewheelin_free-solar-evo-v0.1/c2438204-5b2b-41ce-aa95-27afad6f61a9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/freewheelin_free-solar-evo-v0.1/1762652580.16175", - "retrieved_timestamp": "1762652580.161752", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "freewheelin/free-solar-evo-v0.1", - "developer": "freewheelin", - "inference_platform": "unknown", - "id": "freewheelin/free-solar-evo-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20500715878313985 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4502211109638701 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4945833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414228723404255 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/freewheelin/freewheelin_free-solar-evo-v0.11/d2180e09-02da-48d2-adf6-1710299b272e.json b/leaderboard_data/HFOpenLLMv2/freewheelin/freewheelin_free-solar-evo-v0.11/d2180e09-02da-48d2-adf6-1710299b272e.json deleted file mode 100644 index 98c0bb3750e3394ff6715838ec9a2673d2a3d97c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/freewheelin/freewheelin_free-solar-evo-v0.11/d2180e09-02da-48d2-adf6-1710299b272e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/freewheelin_free-solar-evo-v0.11/1762652580.1621969", - "retrieved_timestamp": "1762652580.162198", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "freewheelin/free-solar-evo-v0.11", - "developer": "freewheelin", - "inference_platform": "unknown", - "id": "freewheelin/free-solar-evo-v0.11" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20265894493277836 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4545155032474882 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5052187499999999 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34674202127659576 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/freewheelin/freewheelin_free-solar-evo-v0.13/6f6887bf-961c-4b6b-a285-a78459a46488.json b/leaderboard_data/HFOpenLLMv2/freewheelin/freewheelin_free-solar-evo-v0.13/6f6887bf-961c-4b6b-a285-a78459a46488.json deleted file mode 100644 index f85188ccca6d6a47879420d982b3860e15400daa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/freewheelin/freewheelin_free-solar-evo-v0.13/6f6887bf-961c-4b6b-a285-a78459a46488.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/freewheelin_free-solar-evo-v0.13/1762652580.1624699", - "retrieved_timestamp": "1762652580.1624708", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "freewheelin/free-solar-evo-v0.13", - "developer": "freewheelin", - "inference_platform": "unknown", - "id": "freewheelin/free-solar-evo-v0.13" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2320598234905606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4554839670962904 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.50515625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34699135638297873 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/gabrielmbmb/gabrielmbmb_SmolLM-1.7B-Instruct-IFEval/6e3decae-f2a9-4f71-9511-76d28a675cc2.json b/leaderboard_data/HFOpenLLMv2/gabrielmbmb/gabrielmbmb_SmolLM-1.7B-Instruct-IFEval/6e3decae-f2a9-4f71-9511-76d28a675cc2.json deleted file mode 100644 index 954c27d98be1ace663c8f2e983ceb21ab32e6ccd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/gabrielmbmb/gabrielmbmb_SmolLM-1.7B-Instruct-IFEval/6e3decae-f2a9-4f71-9511-76d28a675cc2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/gabrielmbmb_SmolLM-1.7B-Instruct-IFEval/1762652580.162997", - "retrieved_timestamp": "1762652580.162998", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "gabrielmbmb/SmolLM-1.7B-Instruct-IFEval", - "developer": "gabrielmbmb", - "inference_platform": "unknown", - "id": "gabrielmbmb/SmolLM-1.7B-Instruct-IFEval" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23058595637353335 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313843378282092 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33276041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11560837765957446 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.711 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/gaverfraxz/gaverfraxz_Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA/3666aa17-279d-4f0b-a6c2-2c8198729df9.json b/leaderboard_data/HFOpenLLMv2/gaverfraxz/gaverfraxz_Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA/3666aa17-279d-4f0b-a6c2-2c8198729df9.json deleted file mode 100644 index 676f5a94b2ca3c8c9c48ff4ee5239a5787a184b2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/gaverfraxz/gaverfraxz_Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA/3666aa17-279d-4f0b-a6c2-2c8198729df9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/gaverfraxz_Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA/1762652580.163272", - "retrieved_timestamp": "1762652580.1632729", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA", - "developer": "gaverfraxz", - "inference_platform": "unknown", - "id": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40094615619888563 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3984844272016949 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36504166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16539228723404256 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/gaverfraxz/gaverfraxz_Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES/83a638be-6f3d-4d5b-b1de-6515634aebbd.json b/leaderboard_data/HFOpenLLMv2/gaverfraxz/gaverfraxz_Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES/83a638be-6f3d-4d5b-b1de-6515634aebbd.json deleted file mode 100644 index 78e4f39ff54c017fcab1b6ca0eca84b9bf41c146..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/gaverfraxz/gaverfraxz_Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES/83a638be-6f3d-4d5b-b1de-6515634aebbd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/gaverfraxz_Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES/1762652580.163549", - "retrieved_timestamp": "1762652580.16355", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES", - "developer": "gaverfraxz", - "inference_platform": "unknown", - "id": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45505148561372716 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5043660783243713 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36785239361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ghost-x/ghost-x_ghost-8b-beta-1608/b5fba89f-ec8f-4e71-ad19-32c7d85698fb.json b/leaderboard_data/HFOpenLLMv2/ghost-x/ghost-x_ghost-8b-beta-1608/b5fba89f-ec8f-4e71-ad19-32c7d85698fb.json deleted file mode 100644 index 9e69add894b2d5a46ca1f8a813f9b63e126447d4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ghost-x/ghost-x_ghost-8b-beta-1608/b5fba89f-ec8f-4e71-ad19-32c7d85698fb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ghost-x_ghost-8b-beta-1608/1762652580.16434", - "retrieved_timestamp": "1762652580.164341", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ghost-x/ghost-8b-beta-1608", - "developer": "ghost-x", - "inference_platform": "unknown", - "id": "ghost-x/ghost-8b-beta-1608" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42727407722620425 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45165496100352914 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06948640483383686 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35158333333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2839926861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/gmonsoon/gmonsoon_SahabatAI-MediChatIndo-8B-v1/61543864-320f-41ef-889d-7c0e95a229bd.json b/leaderboard_data/HFOpenLLMv2/gmonsoon/gmonsoon_SahabatAI-MediChatIndo-8B-v1/61543864-320f-41ef-889d-7c0e95a229bd.json deleted file mode 100644 index e65ff7a567eaae5a2222d2d938c69d93bff87914..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/gmonsoon/gmonsoon_SahabatAI-MediChatIndo-8B-v1/61543864-320f-41ef-889d-7c0e95a229bd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/gmonsoon_SahabatAI-MediChatIndo-8B-v1/1762652580.165248", - "retrieved_timestamp": "1762652580.165249", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "gmonsoon/SahabatAI-MediChatIndo-8B-v1", - "developer": "gmonsoon", - "inference_platform": "unknown", - "id": "gmonsoon/SahabatAI-MediChatIndo-8B-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41628323958208663 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4508834027881236 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3753958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3107546542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/gmonsoon/gmonsoon_SahabatAI-Rebase-8B-Test/a7daa424-7b22-4320-bddd-be350d54b08d.json b/leaderboard_data/HFOpenLLMv2/gmonsoon/gmonsoon_SahabatAI-Rebase-8B-Test/a7daa424-7b22-4320-bddd-be350d54b08d.json deleted file mode 100644 index 09df6239d20e320ec32117322f0956630124212f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/gmonsoon/gmonsoon_SahabatAI-Rebase-8B-Test/a7daa424-7b22-4320-bddd-be350d54b08d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/gmonsoon_SahabatAI-Rebase-8B-Test/1762652580.165493", - "retrieved_timestamp": "1762652580.165493", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "gmonsoon/SahabatAI-Rebase-8B-Test", - "developer": "gmonsoon", - "inference_platform": "unknown", - "id": "gmonsoon/SahabatAI-Rebase-8B-Test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5156263159527831 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.522960549734047 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1148036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41328125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3663563829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/gmonsoon/gmonsoon_StockSeaLLMs-7B-v1/ac53d663-0e5c-4a7e-8d9d-efcd70d39b10.json b/leaderboard_data/HFOpenLLMv2/gmonsoon/gmonsoon_StockSeaLLMs-7B-v1/ac53d663-0e5c-4a7e-8d9d-efcd70d39b10.json deleted file mode 100644 index 5bc2507c2a056317329069b0b79668b347c612bf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/gmonsoon/gmonsoon_StockSeaLLMs-7B-v1/ac53d663-0e5c-4a7e-8d9d-efcd70d39b10.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/gmonsoon_StockSeaLLMs-7B-v1/1762652580.165695", - "retrieved_timestamp": "1762652580.165696", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "gmonsoon/StockSeaLLMs-7B-v1", - "developer": "gmonsoon", - "inference_platform": "unknown", - "id": "gmonsoon/StockSeaLLMs-7B-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4599218961245052 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5271087932535433 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19637462235649547 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.421375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39519614361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/gmonsoon/gmonsoon_gemma2-9b-sahabatai-v1-instruct-BaseTIES/6d500e75-5605-4268-88a1-dc4abc7c5a7f.json b/leaderboard_data/HFOpenLLMv2/gmonsoon/gmonsoon_gemma2-9b-sahabatai-v1-instruct-BaseTIES/6d500e75-5605-4268-88a1-dc4abc7c5a7f.json deleted file mode 100644 index e80d81d20eff3724f3bd2d7dc821162aa35cdaad..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/gmonsoon/gmonsoon_gemma2-9b-sahabatai-v1-instruct-BaseTIES/6d500e75-5605-4268-88a1-dc4abc7c5a7f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/gmonsoon_gemma2-9b-sahabatai-v1-instruct-BaseTIES/1762652580.165903", - "retrieved_timestamp": "1762652580.1659038", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "gmonsoon/gemma2-9b-sahabatai-v1-instruct-BaseTIES", - "developer": "gmonsoon", - "inference_platform": "unknown", - "id": "gmonsoon/gemma2-9b-sahabatai-v1-instruct-BaseTIES" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7377923908562614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6077244532441547 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19939577039274925 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47780208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43467420212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_full_2/3c550631-c27c-4743-98f3-3ab65c5fa906.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_full_2/3c550631-c27c-4743-98f3-3ab65c5fa906.json deleted file mode 100644 index 0f3614ebc66f8afa1b3b8c39ca14b919831bf7f8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_full_2/3c550631-c27c-4743-98f3-3ab65c5fa906.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_full_2/1762652580.166118", - "retrieved_timestamp": "1762652580.166118", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_full_2", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_full_2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31781450994472443 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4216953430035033 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09290030211480363 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40515625000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.285405585106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_full_3B/d7d6baf0-00d3-4960-970c-949bb9919ac9.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_full_3B/d7d6baf0-00d3-4960-970c-949bb9919ac9.json deleted file mode 100644 index 4d89ce685ccf42397421c5d7c11ff2ce005c3658..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_full_3B/d7d6baf0-00d3-4960-970c-949bb9919ac9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_full_3B/1762652580.166356", - "retrieved_timestamp": "1762652580.166357", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_full_3B", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_full_3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36957162550920447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46841893776834337 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1336858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4954791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.335688164893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ifd_max_2600/017ca821-f6ea-43bc-bac1-28dd30c2341d.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ifd_max_2600/017ca821-f6ea-43bc-bac1-28dd30c2341d.json deleted file mode 100644 index 82eaff22a493b312d11a767b7e17c4b62f27de55..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ifd_max_2600/017ca821-f6ea-43bc-bac1-28dd30c2341d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ifd_max_2600/1762652580.16661", - "retrieved_timestamp": "1762652580.166613", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_ifd_max_2600", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_ifd_max_2600" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3042504997850149 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40285133876405865 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09894259818731117 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3508645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29163896276595747 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ifd_max_2600_3B/41d72b83-3c55-460f-9d21-88866eed6b9a.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ifd_max_2600_3B/41d72b83-3c55-460f-9d21-88866eed6b9a.json deleted file mode 100644 index 67d4f12fea3f02156ac8b32de005fb7e13dabef3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ifd_max_2600_3B/41d72b83-3c55-460f-9d21-88866eed6b9a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ifd_max_2600_3B/1762652580.1669528", - "retrieved_timestamp": "1762652580.166954", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_ifd_max_2600_3B", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_ifd_max_2600_3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.298155560579263 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4626377955326701 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1593655589123867 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43455208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32878989361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ifd_me_max_5200/e2f13357-053c-42e5-8149-465b4f16d334.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ifd_me_max_5200/e2f13357-053c-42e5-8149-465b4f16d334.json deleted file mode 100644 index ef312cdf869e24d2b19b65af2f61eaff12a00311..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ifd_me_max_5200/e2f13357-053c-42e5-8149-465b4f16d334.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ifd_me_max_5200/1762652580.167201", - "retrieved_timestamp": "1762652580.167202", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_ifd_me_max_5200", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_ifd_me_max_5200" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36832271705740766 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4153453015610935 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3482604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29820478723404253 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ifd_min_2600/5561b7bd-bd90-445c-b969-8d400e99e629.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ifd_min_2600/5561b7bd-bd90-445c-b969-8d400e99e629.json deleted file mode 100644 index 112fd0348d6e86c69c439f1bb3652d3527d082d6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ifd_min_2600/5561b7bd-bd90-445c-b969-8d400e99e629.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ifd_min_2600/1762652580.167441", - "retrieved_timestamp": "1762652580.167443", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_ifd_min_2600", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_ifd_min_2600" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3749673089624419 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4219047173013076 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09667673716012085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36562500000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.289311835106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ins_ans_max_5200/9c2cee8b-3f35-4a49-814e-ad316fcede7f.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ins_ans_max_5200/9c2cee8b-3f35-4a49-814e-ad316fcede7f.json deleted file mode 100644 index fcc43fe134c3587eaa3393e1f2a8fe78e44baf45..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ins_ans_max_5200/9c2cee8b-3f35-4a49-814e-ad316fcede7f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ins_ans_max_5200/1762652580.167691", - "retrieved_timestamp": "1762652580.1676931", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_ins_ans_max_5200", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_ins_ans_max_5200" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34786477657061043 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40982060224148426 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3601666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2900598404255319 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ins_max_5200/cdd1de41-4e85-4872-be9f-e3af4e9221a9.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ins_max_5200/cdd1de41-4e85-4872-be9f-e3af4e9221a9.json deleted file mode 100644 index 5f09f888b5aff912800153a18729a6d3fbca7193..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ins_max_5200/cdd1de41-4e85-4872-be9f-e3af4e9221a9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ins_max_5200/1762652580.1679769", - "retrieved_timestamp": "1762652580.167978", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_ins_max_5200", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_ins_max_5200" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32750657145263457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41550742328078477 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.361375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2915558510638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ins_min_2600/121f28df-65d6-4a48-aa77-4ee794034032.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ins_min_2600/121f28df-65d6-4a48-aa77-4ee794034032.json deleted file mode 100644 index acaab98030218f910a63e613099efb061ba341ca..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ins_min_2600/121f28df-65d6-4a48-aa77-4ee794034032.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ins_min_2600/1762652580.1682088", - "retrieved_timestamp": "1762652580.16821", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_ins_min_2600", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_ins_min_2600" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33300199027469335 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41873469888886056 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11102719033232629 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38534375000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28798204787234044 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ins_min_5200/d976888b-5e17-4e5c-b557-0b48bf36d4f7.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ins_min_5200/d976888b-5e17-4e5c-b557-0b48bf36d4f7.json deleted file mode 100644 index 0be2526ebd6a8bc2dbaa324c40d58e9c81650225..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ins_min_5200/d976888b-5e17-4e5c-b557-0b48bf36d4f7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ins_min_5200/1762652580.1684108", - "retrieved_timestamp": "1762652580.1684108", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_ins_min_5200", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_ins_min_5200" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3359995921931586 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4289279419241076 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39055208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29488031914893614 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_sampled_ifd_5200/e7ca66f4-852b-4b5b-8781-d6272a43c559.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_sampled_ifd_5200/e7ca66f4-852b-4b5b-8781-d6272a43c559.json deleted file mode 100644 index c106b86217e361ef245c888c6533c1db8928bdc2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_sampled_ifd_5200/e7ca66f4-852b-4b5b-8781-d6272a43c559.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_sampled_ifd_5200/1762652580.1686149", - "retrieved_timestamp": "1762652580.1686149", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_sampled_ifd_5200", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_sampled_ifd_5200" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2923853154075631 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4032969715626326 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12537764350453173 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3520729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2896442819148936 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_sampled_ifd_new_5200/906db90c-7ea4-4878-aa01-06fd1ad0d18a.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_sampled_ifd_new_5200/906db90c-7ea4-4878-aa01-06fd1ad0d18a.json deleted file mode 100644 index f0dec574729b3655f788361c0c969c4a09e01812..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_sampled_ifd_new_5200/906db90c-7ea4-4878-aa01-06fd1ad0d18a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_sampled_ifd_new_5200/1762652580.1688168", - "retrieved_timestamp": "1762652580.168818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_sampled_ifd_new_5200", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_sampled_ifd_new_5200" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36632468516868577 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4177831234050982 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09441087613293052 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29247007978723405 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_0.1_2600/08195b61-5fe5-4cce-8da4-34b731289278.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_0.1_2600/08195b61-5fe5-4cce-8da4-34b731289278.json deleted file mode 100644 index d4344835f39491397d211ed3834ebeb56487c382..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_0.1_2600/08195b61-5fe5-4cce-8da4-34b731289278.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_0.1_2600/1762652580.1691651", - "retrieved_timestamp": "1762652580.169167", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_score_max_0.1_2600", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_score_max_0.1_2600" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3287554799044313 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42522607952607777 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09894259818731117 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37064583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29230385638297873 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_0.3_2600/40e4c93e-7a54-49c2-b513-33edd87f59b0.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_0.3_2600/40e4c93e-7a54-49c2-b513-33edd87f59b0.json deleted file mode 100644 index d1f4fe128847e0269ed60fda530047e1dfea170d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_0.3_2600/40e4c93e-7a54-49c2-b513-33edd87f59b0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_0.3_2600/1762652580.1694138", - "retrieved_timestamp": "1762652580.169415", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_score_max_0.3_2600", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_score_max_0.3_2600" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33752332699459653 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4151448369012765 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37594791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29130651595744683 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_0.7_2600/988c6ec3-e967-4cec-993b-e060a5a18e97.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_0.7_2600/988c6ec3-e967-4cec-993b-e060a5a18e97.json deleted file mode 100644 index ab82067f5cbcb6138dab5fc9f1157dcfdb0c9336..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_0.7_2600/988c6ec3-e967-4cec-993b-e060a5a18e97.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_0.7_2600/1762652580.169624", - "retrieved_timestamp": "1762652580.169625", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_score_max_0.7_2600", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_score_max_0.7_2600" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3639764713183243 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41845266250678703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10725075528700906 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3468645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2982878989361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_2500/b6fd288d-36d5-4499-bf2d-da1fdd1120c5.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_2500/b6fd288d-36d5-4499-bf2d-da1fdd1120c5.json deleted file mode 100644 index 40267fce6d8cc4d2a644329218fe277307ac923c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_2500/b6fd288d-36d5-4499-bf2d-da1fdd1120c5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_2500/1762652580.1698968", - "retrieved_timestamp": "1762652580.169898", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_score_max_2500", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_score_max_2500" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3563577973111345 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41801375075895447 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09516616314199396 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36270833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2939660904255319 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_2600_3B/92dc5ec0-5aea-45f5-9237-32b5a65e095b.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_2600_3B/92dc5ec0-5aea-45f5-9237-32b5a65e095b.json deleted file mode 100644 index 3a2bc19ad25aa94f72bc1b7afa62bbac565ca639..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_2600_3B/92dc5ec0-5aea-45f5-9237-32b5a65e095b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_2600_3B/1762652580.170121", - "retrieved_timestamp": "1762652580.170122", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_score_max_2600_3B", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_score_max_2600_3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33577463352792813 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4716306839273412 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15483383685800603 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44744791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3341921542553192 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_5200/d877dbd4-b3da-44b5-974a-1267db396435.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_5200/d877dbd4-b3da-44b5-974a-1267db396435.json deleted file mode 100644 index b4e353fd2aceab0d77dd190f2e3a0ae9ae206c73..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_5200/d877dbd4-b3da-44b5-974a-1267db396435.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_5200/1762652580.170327", - "retrieved_timestamp": "1762652580.170327", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "godlikehhd/alpaca_data_score_max_5200", - "developer": "godlikehhd", - "inference_platform": "unknown", - "id": "godlikehhd/alpaca_data_score_max_5200" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34454248061809334 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42417102847687554 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3877916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446476063829785 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/AALF_gemma-2-27b-it-SimPO-37K-100steps/214ebe7f-357a-435c-9bf5-451bdea1ca9a.json b/leaderboard_data/HFOpenLLMv2/google/AALF_gemma-2-27b-it-SimPO-37K-100steps/214ebe7f-357a-435c-9bf5-451bdea1ca9a.json deleted file mode 100644 index 3d5393da256807f9c43ea1f90b4c8eca30fce1cf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/AALF_gemma-2-27b-it-SimPO-37K-100steps/214ebe7f-357a-435c-9bf5-451bdea1ca9a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AALF_gemma-2-27b-it-SimPO-37K-100steps/1762652579.472713", - "retrieved_timestamp": "1762652579.472714", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AALF/gemma-2-27b-it-SimPO-37K-100steps", - "developer": "google", - "inference_platform": "unknown", - "id": "AALF/gemma-2-27b-it-SimPO-37K-100steps" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2567642743476199 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39308230769885016 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3329166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21251662234042554 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/AALF_gemma-2-27b-it-SimPO-37K/878ec84b-a365-4887-b7fd-1dc738f6eda8.json b/leaderboard_data/HFOpenLLMv2/google/AALF_gemma-2-27b-it-SimPO-37K/878ec84b-a365-4887-b7fd-1dc738f6eda8.json deleted file mode 100644 index 6b04094170c7adef40bf07ee3ea501e2ccff7b0d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/AALF_gemma-2-27b-it-SimPO-37K/878ec84b-a365-4887-b7fd-1dc738f6eda8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AALF_gemma-2-27b-it-SimPO-37K/1762652579.472391", - "retrieved_timestamp": "1762652579.4723918", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AALF/gemma-2-27b-it-SimPO-37K", - "developer": "google", - "inference_platform": "unknown", - "id": "AALF/gemma-2-27b-it-SimPO-37K" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24065257959990605 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3911343917952534 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3487604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1971409574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/AELLM_gemma-2-aeria-infinity-9b/93d08946-76b5-4547-8bf0-966c5cccd8c1.json b/leaderboard_data/HFOpenLLMv2/google/AELLM_gemma-2-aeria-infinity-9b/93d08946-76b5-4547-8bf0-966c5cccd8c1.json deleted file mode 100644 index 64916bbd45694586ead2b5936d8fe5172e2e9149..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/AELLM_gemma-2-aeria-infinity-9b/93d08946-76b5-4547-8bf0-966c5cccd8c1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AELLM_gemma-2-aeria-infinity-9b/1762652579.4729412", - "retrieved_timestamp": "1762652579.472942", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AELLM/gemma-2-aeria-infinity-9b", - "developer": "google", - "inference_platform": "unknown", - "id": "AELLM/gemma-2-aeria-infinity-9b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.759399504426034 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5983336669577649 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40196875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38622007978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/AELLM_gemma-2-lyco-infinity-9b/fa16a47e-4009-487b-8252-1fef155ce6b4.json b/leaderboard_data/HFOpenLLMv2/google/AELLM_gemma-2-lyco-infinity-9b/fa16a47e-4009-487b-8252-1fef155ce6b4.json deleted file mode 100644 index 6a00de2f8127765c9ccb3b46d99012738fcd7601..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/AELLM_gemma-2-lyco-infinity-9b/fa16a47e-4009-487b-8252-1fef155ce6b4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AELLM_gemma-2-lyco-infinity-9b/1762652579.473207", - "retrieved_timestamp": "1762652579.473208", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AELLM/gemma-2-lyco-infinity-9b", - "developer": "google", - "inference_platform": "unknown", - "id": "AELLM/gemma-2-lyco-infinity-9b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7316475839660989 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5839534871023703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40063541666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.378656914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/Aashraf995_Gemma-Evo-10B/15b910c7-6c36-4af8-af78-d48278dbc4db.json b/leaderboard_data/HFOpenLLMv2/google/Aashraf995_Gemma-Evo-10B/15b910c7-6c36-4af8-af78-d48278dbc4db.json deleted file mode 100644 index ee316bac2aa616ed2f71bc17734d7a3e101c0b62..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/Aashraf995_Gemma-Evo-10B/15b910c7-6c36-4af8-af78-d48278dbc4db.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Aashraf995_Gemma-Evo-10B/1762652579.476305", - "retrieved_timestamp": "1762652579.476305", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Aashraf995/Gemma-Evo-10B", - "developer": "google", - "inference_platform": "unknown", - "id": "Aashraf995/Gemma-Evo-10B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7332211864519476 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6044352897552882 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22280966767371602 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540268456375839 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45947916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4275265957446808 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/BAAI_Gemma2-9B-IT-Simpo-Infinity-Preference/0f948238-5ed2-41ee-a815-3ff20728de89.json b/leaderboard_data/HFOpenLLMv2/google/BAAI_Gemma2-9B-IT-Simpo-Infinity-Preference/0f948238-5ed2-41ee-a815-3ff20728de89.json deleted file mode 100644 index 4e6900f5ff45d35187e7485dc5897e8599cebe42..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/BAAI_Gemma2-9B-IT-Simpo-Infinity-Preference/0f948238-5ed2-41ee-a815-3ff20728de89.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BAAI_Gemma2-9B-IT-Simpo-Infinity-Preference/1762652579.487571", - "retrieved_timestamp": "1762652579.487571", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BAAI/Gemma2-9B-IT-Simpo-Infinity-Preference", - "developer": "google", - "inference_platform": "unknown", - "id": "BAAI/Gemma2-9B-IT-Simpo-Infinity-Preference" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31763831079314 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5979459664230056 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976510067114096 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39657291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3868849734042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/BlackBeenie_Neos-Gemma-2-9b/ea9ebbaa-fb04-491d-adc2-0389cb5d1ef6.json b/leaderboard_data/HFOpenLLMv2/google/BlackBeenie_Neos-Gemma-2-9b/ea9ebbaa-fb04-491d-adc2-0389cb5d1ef6.json deleted file mode 100644 index 155351adf699881a50894e56817bf4d24132e5c4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/BlackBeenie_Neos-Gemma-2-9b/ea9ebbaa-fb04-491d-adc2-0389cb5d1ef6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BlackBeenie_Neos-Gemma-2-9b/1762652579.4958751", - "retrieved_timestamp": "1762652579.495876", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BlackBeenie/Neos-Gemma-2-9b", - "developer": "google", - "inference_platform": "unknown", - "id": "BlackBeenie/Neos-Gemma-2-9b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5875665456544192 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5502975126048852 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09818731117824774 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36175 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39810505319148937 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/Columbia-NLP_LION-Gemma-2b-odpo-v1.0/25418041-6fe1-4cd8-88cb-79456a65210c.json b/leaderboard_data/HFOpenLLMv2/google/Columbia-NLP_LION-Gemma-2b-odpo-v1.0/25418041-6fe1-4cd8-88cb-79456a65210c.json deleted file mode 100644 index 7caf7c97ccbda5bb361dbed14c1f0efa1f8c25a0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/Columbia-NLP_LION-Gemma-2b-odpo-v1.0/25418041-6fe1-4cd8-88cb-79456a65210c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-Gemma-2b-odpo-v1.0/1762652579.507273", - "retrieved_timestamp": "1762652579.507273", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Columbia-NLP/LION-Gemma-2b-odpo-v1.0", - "developer": "google", - "inference_platform": "unknown", - "id": "Columbia-NLP/LION-Gemma-2b-odpo-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30664858131978706 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3895836210706875 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06948640483383686 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2424496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42791666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1692154255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-9B/a639bba5-4d0e-4d0b-826a-3eb4d0ccebab.json b/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-9B/a639bba5-4d0e-4d0b-826a-3eb4d0ccebab.json deleted file mode 100644 index 6327cb5efb667284be1fdc72c6174cd0453ae704..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-9B/a639bba5-4d0e-4d0b-826a-3eb4d0ccebab.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavidAU_Gemma-The-Writer-9B/1762652579.539702", - "retrieved_timestamp": "1762652579.5397062", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavidAU/Gemma-The-Writer-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "DavidAU/Gemma-The-Writer-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17403156956874427 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5905439384199537 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08761329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.409875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39793882978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-DEADLINE-10B/66d2e2a4-a75c-4fb9-af6a-3181f17281af.json b/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-DEADLINE-10B/66d2e2a4-a75c-4fb9-af6a-3181f17281af.json deleted file mode 100644 index 483a78277385b8cce8072ab1faf599cd881154bc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-DEADLINE-10B/66d2e2a4-a75c-4fb9-af6a-3181f17281af.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavidAU_Gemma-The-Writer-DEADLINE-10B/1762652579.5400288", - "retrieved_timestamp": "1762652579.54003", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavidAU/Gemma-The-Writer-DEADLINE-10B", - "developer": "google", - "inference_platform": "unknown", - "id": "DavidAU/Gemma-The-Writer-DEADLINE-10B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23315802071836061 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5896087932535433 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09894259818731117 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3422818791946309 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4188645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39461436170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.952 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-J.GutenBerg-10B/3d1cef14-ea09-45ca-a92c-a1fe7a05ce8b.json b/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-J.GutenBerg-10B/3d1cef14-ea09-45ca-a92c-a1fe7a05ce8b.json deleted file mode 100644 index 339312494e1ebd21c7e17246b68d63239393b1ae..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-J.GutenBerg-10B/3d1cef14-ea09-45ca-a92c-a1fe7a05ce8b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavidAU_Gemma-The-Writer-J.GutenBerg-10B/1762652579.5402539", - "retrieved_timestamp": "1762652579.540255", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavidAU/Gemma-The-Writer-J.GutenBerg-10B", - "developer": "google", - "inference_platform": "unknown", - "id": "DavidAU/Gemma-The-Writer-J.GutenBerg-10B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28578948301617485 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5909421265868766 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09214501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41759375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3946974734042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.034 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-Mighty-Sword-9B/a403d91c-4f30-4d05-9f00-24ce97cc91ac.json b/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-Mighty-Sword-9B/a403d91c-4f30-4d05-9f00-24ce97cc91ac.json deleted file mode 100644 index 8d03495e92476363d567bd844b15484966c025dd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-Mighty-Sword-9B/a403d91c-4f30-4d05-9f00-24ce97cc91ac.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavidAU_Gemma-The-Writer-Mighty-Sword-9B/1762652579.540473", - "retrieved_timestamp": "1762652579.5404742", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavidAU/Gemma-The-Writer-Mighty-Sword-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "DavidAU/Gemma-The-Writer-Mighty-Sword-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7527549125209998 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5911959785635329 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19108761329305135 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34815436241610737 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4111770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39677526595744683 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-N-Restless-Quill-10B-Uncensored/b708a2a6-d738-48a9-9c20-0838bdb19646.json b/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-N-Restless-Quill-10B-Uncensored/b708a2a6-d738-48a9-9c20-0838bdb19646.json deleted file mode 100644 index 8e81e365a7e90cab5aa4cfcf6bbbf3c885d2eb76..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-N-Restless-Quill-10B-Uncensored/b708a2a6-d738-48a9-9c20-0838bdb19646.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavidAU_Gemma-The-Writer-N-Restless-Quill-10B-Uncensored/1762652579.540709", - "retrieved_timestamp": "1762652579.54071", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored", - "developer": "google", - "inference_platform": "unknown", - "id": "DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7070927361622716 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5922294775018883 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41632291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3966090425531915 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.034 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/EpistemeAI2_Athene-codegemma-2-7b-it-alpaca-v1.2/ea4bffba-6e14-4380-a060-2b4deb6d94c0.json b/leaderboard_data/HFOpenLLMv2/google/EpistemeAI2_Athene-codegemma-2-7b-it-alpaca-v1.2/ea4bffba-6e14-4380-a060-2b4deb6d94c0.json deleted file mode 100644 index a9d11bdcc26f3055618ffac6b7ffd9b2fda744b9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/EpistemeAI2_Athene-codegemma-2-7b-it-alpaca-v1.2/ea4bffba-6e14-4380-a060-2b4deb6d94c0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Athene-codegemma-2-7b-it-alpaca-v1.2/1762652579.609552", - "retrieved_timestamp": "1762652579.6095529", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI2/Athene-codegemma-2-7b-it-alpaca-v1.2", - "developer": "google", - "inference_platform": "unknown", - "id": "EpistemeAI2/Athene-codegemma-2-7b-it-alpaca-v1.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4351177098986245 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41754154460978427 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41696875000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22972074468085107 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/EpistemeAI_Athena-gemma-2-2b-it-Philos/21096485-ff49-4481-a530-48746334fceb.json b/leaderboard_data/HFOpenLLMv2/google/EpistemeAI_Athena-gemma-2-2b-it-Philos/21096485-ff49-4481-a530-48746334fceb.json deleted file mode 100644 index 0a91c9bdac86ea9ce5346520d14c0d401ed04b60..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/EpistemeAI_Athena-gemma-2-2b-it-Philos/21096485-ff49-4481-a530-48746334fceb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Athena-gemma-2-2b-it-Philos/1762652579.598697", - "retrieved_timestamp": "1762652579.598698", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Athena-gemma-2-2b-it-Philos", - "developer": "google", - "inference_platform": "unknown", - "id": "EpistemeAI/Athena-gemma-2-2b-it-Philos" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4620950189940469 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37947768790586744 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43136458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22481715425531915 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/EpistemeAI_Athena-gemma-2-2b-it/a0ca047c-97c2-4ba1-84a7-ba0b00ba6d25.json b/leaderboard_data/HFOpenLLMv2/google/EpistemeAI_Athena-gemma-2-2b-it/a0ca047c-97c2-4ba1-84a7-ba0b00ba6d25.json deleted file mode 100644 index d97646627f4881a09af4c4517b6f9f58d14f94f5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/EpistemeAI_Athena-gemma-2-2b-it/a0ca047c-97c2-4ba1-84a7-ba0b00ba6d25.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Athena-gemma-2-2b-it/1762652579.598221", - "retrieved_timestamp": "1762652579.598221", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Athena-gemma-2-2b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "EpistemeAI/Athena-gemma-2-2b-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3134172883504657 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42642293591146 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43505208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2421875 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/EpistemeAI_Athene-codegemma-2-7b-it-alpaca-v1.3/c05e106e-203a-49e7-b656-22809ac16037.json b/leaderboard_data/HFOpenLLMv2/google/EpistemeAI_Athene-codegemma-2-7b-it-alpaca-v1.3/c05e106e-203a-49e7-b656-22809ac16037.json deleted file mode 100644 index a4bc5815456b8e27fee16e71bbc5cc7574b21270..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/EpistemeAI_Athene-codegemma-2-7b-it-alpaca-v1.3/c05e106e-203a-49e7-b656-22809ac16037.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Athene-codegemma-2-7b-it-alpaca-v1.3/1762652579.598942", - "retrieved_timestamp": "1762652579.598943", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Athene-codegemma-2-7b-it-alpaca-v1.3", - "developer": "google", - "inference_platform": "unknown", - "id": "EpistemeAI/Athene-codegemma-2-7b-it-alpaca-v1.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40299405577201824 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4331916189482215 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4503020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25872672872340424 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GemmaForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/GenVRadmin_AryaBhatta-GemmaOrca-2-Merged/d4bb122a-87b4-482e-8050-7c1716a4ed5b.json b/leaderboard_data/HFOpenLLMv2/google/GenVRadmin_AryaBhatta-GemmaOrca-2-Merged/d4bb122a-87b4-482e-8050-7c1716a4ed5b.json deleted file mode 100644 index 0cb75fba0f0655203f71a937c8dd1272af918f8f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/GenVRadmin_AryaBhatta-GemmaOrca-2-Merged/d4bb122a-87b4-482e-8050-7c1716a4ed5b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/GenVRadmin_AryaBhatta-GemmaOrca-2-Merged/1762652579.627253", - "retrieved_timestamp": "1762652579.627253", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "GenVRadmin/AryaBhatta-GemmaOrca-2-Merged", - "developer": "google", - "inference_platform": "unknown", - "id": "GenVRadmin/AryaBhatta-GemmaOrca-2-Merged" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30637375497014585 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3887493166323577 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4550208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23844747340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/GenVRadmin_AryaBhatta-GemmaOrca-Merged/179d4baf-7da1-4a56-82e7-35ea45204e13.json b/leaderboard_data/HFOpenLLMv2/google/GenVRadmin_AryaBhatta-GemmaOrca-Merged/179d4baf-7da1-4a56-82e7-35ea45204e13.json deleted file mode 100644 index d687f2a3eafd8c3db8253d6d1b4670387ce95ac5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/GenVRadmin_AryaBhatta-GemmaOrca-Merged/179d4baf-7da1-4a56-82e7-35ea45204e13.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/GenVRadmin_AryaBhatta-GemmaOrca-Merged/1762652579.627504", - "retrieved_timestamp": "1762652579.6275048", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "GenVRadmin/AryaBhatta-GemmaOrca-Merged", - "developer": "google", - "inference_platform": "unknown", - "id": "GenVRadmin/AryaBhatta-GemmaOrca-Merged" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30637375497014585 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4130633897394575 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22282247340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/GenVRadmin_AryaBhatta-GemmaUltra-Merged/4aca90c3-b0c0-4ec6-ba6b-0d5b09ef63fe.json b/leaderboard_data/HFOpenLLMv2/google/GenVRadmin_AryaBhatta-GemmaUltra-Merged/4aca90c3-b0c0-4ec6-ba6b-0d5b09ef63fe.json deleted file mode 100644 index 4b4b3b1af72816428340a68c0518631138b8baa9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/GenVRadmin_AryaBhatta-GemmaUltra-Merged/4aca90c3-b0c0-4ec6-ba6b-0d5b09ef63fe.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/GenVRadmin_AryaBhatta-GemmaUltra-Merged/1762652579.627715", - "retrieved_timestamp": "1762652579.627716", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "GenVRadmin/AryaBhatta-GemmaUltra-Merged", - "developer": "google", - "inference_platform": "unknown", - "id": "GenVRadmin/AryaBhatta-GemmaUltra-Merged" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30207737691547315 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4141445378464817 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25335570469798663 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42785416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2265625 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/Gunulhona_Gemma-Ko-Merge-PEFT/7891a95c-8d95-4181-96e8-cdc2f6ab538b.json b/leaderboard_data/HFOpenLLMv2/google/Gunulhona_Gemma-Ko-Merge-PEFT/7891a95c-8d95-4181-96e8-cdc2f6ab538b.json deleted file mode 100644 index 75c9626babc284f0feb2056be985c15b43c8ca30..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/Gunulhona_Gemma-Ko-Merge-PEFT/7891a95c-8d95-4181-96e8-cdc2f6ab538b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Gunulhona_Gemma-Ko-Merge-PEFT/1762652579.635783", - "retrieved_timestamp": "1762652579.635786", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Gunulhona/Gemma-Ko-Merge-PEFT", - "developer": "google", - "inference_platform": "unknown", - "id": "Gunulhona/Gemma-Ko-Merge-PEFT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4441348954108433 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4862989687822461 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3985833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3097573138297872 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 20.318 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/Gunulhona_Gemma-Ko-Merge-PEFT/f9fb4008-db4e-4a84-b12b-050bdf35084f.json b/leaderboard_data/HFOpenLLMv2/google/Gunulhona_Gemma-Ko-Merge-PEFT/f9fb4008-db4e-4a84-b12b-050bdf35084f.json deleted file mode 100644 index d11cecad0c2923d067dcb5107fe66d5aed95b01e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/Gunulhona_Gemma-Ko-Merge-PEFT/f9fb4008-db4e-4a84-b12b-050bdf35084f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Gunulhona_Gemma-Ko-Merge-PEFT/1762652579.635457", - "retrieved_timestamp": "1762652579.635457", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Gunulhona/Gemma-Ko-Merge-PEFT", - "developer": "google", - "inference_platform": "unknown", - "id": "Gunulhona/Gemma-Ko-Merge-PEFT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28803906966847964 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5154093999781059 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40801041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38173204787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 20.318 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/Gunulhona_Gemma-Ko-Merge/dccf426d-63bb-4298-958f-d1f4776f03b2.json b/leaderboard_data/HFOpenLLMv2/google/Gunulhona_Gemma-Ko-Merge/dccf426d-63bb-4298-958f-d1f4776f03b2.json deleted file mode 100644 index f2c993c0e0d2c5869316d481a1e2a32a2ec110c6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/Gunulhona_Gemma-Ko-Merge/dccf426d-63bb-4298-958f-d1f4776f03b2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Gunulhona_Gemma-Ko-Merge/1762652579.635146", - "retrieved_timestamp": "1762652579.635147", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Gunulhona/Gemma-Ko-Merge", - "developer": "google", - "inference_platform": "unknown", - "id": "Gunulhona/Gemma-Ko-Merge" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6415721397004392 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5813027258981727 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18806646525679757 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40469791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3878823138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/HuggingFaceH4_zephyr-7b-gemma-v0.1/dcf4d2bb-ee8f-4083-baf6-8870731515fa.json b/leaderboard_data/HFOpenLLMv2/google/HuggingFaceH4_zephyr-7b-gemma-v0.1/dcf4d2bb-ee8f-4083-baf6-8870731515fa.json deleted file mode 100644 index a7f0ce45a526a5c173cbd4e12b78f490656b0d49..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/HuggingFaceH4_zephyr-7b-gemma-v0.1/dcf4d2bb-ee8f-4083-baf6-8870731515fa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HuggingFaceH4_zephyr-7b-gemma-v0.1/1762652579.641236", - "retrieved_timestamp": "1762652579.641237", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HuggingFaceH4/zephyr-7b-gemma-v0.1", - "developer": "google", - "inference_platform": "unknown", - "id": "HuggingFaceH4/zephyr-7b-gemma-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3363741539116212 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4623735014679749 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37396874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2847406914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/INSAIT-Institute_BgGPT-Gemma-2-27B-IT-v1.0/51d4db96-4c38-464a-9e7f-0ade67699c8d.json b/leaderboard_data/HFOpenLLMv2/google/INSAIT-Institute_BgGPT-Gemma-2-27B-IT-v1.0/51d4db96-4c38-464a-9e7f-0ade67699c8d.json deleted file mode 100644 index 788ac4539f06e8ec705eb938ec5739f239253de9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/INSAIT-Institute_BgGPT-Gemma-2-27B-IT-v1.0/51d4db96-4c38-464a-9e7f-0ade67699c8d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/INSAIT-Institute_BgGPT-Gemma-2-27B-IT-v1.0/1762652579.645844", - "retrieved_timestamp": "1762652579.645845", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0", - "developer": "google", - "inference_platform": "unknown", - "id": "INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911778102988436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35753125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11668882978723404 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/IlyaGusev_gemma-2-2b-it-abliterated/e3ee4f00-1037-4da7-96e2-934b5ccefd15.json b/leaderboard_data/HFOpenLLMv2/google/IlyaGusev_gemma-2-2b-it-abliterated/e3ee4f00-1037-4da7-96e2-934b5ccefd15.json deleted file mode 100644 index c0fe2e3ab347befe9ecfe07f0fb7e8d8453dc83a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/IlyaGusev_gemma-2-2b-it-abliterated/e3ee4f00-1037-4da7-96e2-934b5ccefd15.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/IlyaGusev_gemma-2-2b-it-abliterated/1762652579.646105", - "retrieved_timestamp": "1762652579.646106", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "IlyaGusev/gemma-2-2b-it-abliterated", - "developer": "google", - "inference_platform": "unknown", - "id": "IlyaGusev/gemma-2-2b-it-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.533086654521115 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4118601326211988 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37818749999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25382313829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/IlyaGusev_gemma-2-9b-it-abliterated/8a81c9e6-1c72-46f6-98c6-0d3b28ba5633.json b/leaderboard_data/HFOpenLLMv2/google/IlyaGusev_gemma-2-9b-it-abliterated/8a81c9e6-1c72-46f6-98c6-0d3b28ba5633.json deleted file mode 100644 index 1f924626597d933a5000f07456860b7224b2cdc0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/IlyaGusev_gemma-2-9b-it-abliterated/8a81c9e6-1c72-46f6-98c6-0d3b28ba5633.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/IlyaGusev_gemma-2-9b-it-abliterated/1762652579.646349", - "retrieved_timestamp": "1762652579.6463501", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "IlyaGusev/gemma-2-9b-it-abliterated", - "developer": "google", - "inference_platform": "unknown", - "id": "IlyaGusev/gemma-2-9b-it-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.747259493698941 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.59063299776093 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17749244712990936 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4033645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39153922872340424 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/LenguajeNaturalAI_leniachat-gemma-2b-v0/af954640-6806-4e4c-9c0b-b81215eadfc8.json b/leaderboard_data/HFOpenLLMv2/google/LenguajeNaturalAI_leniachat-gemma-2b-v0/af954640-6806-4e4c-9c0b-b81215eadfc8.json deleted file mode 100644 index 55638238d2105ebf6f4c6bb57457247637de6c75..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/LenguajeNaturalAI_leniachat-gemma-2b-v0/af954640-6806-4e4c-9c0b-b81215eadfc8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LenguajeNaturalAI_leniachat-gemma-2b-v0/1762652579.7101068", - "retrieved_timestamp": "1762652579.7101078", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LenguajeNaturalAI/leniachat-gemma-2b-v0", - "developer": "google", - "inference_platform": "unknown", - "id": "LenguajeNaturalAI/leniachat-gemma-2b-v0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21497404664069114 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30740211895412034 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36590625000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11702127659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/ModelSpace_GemmaX2-28-9B-v0.1/6cb560eb-08f5-4430-8797-1116f1d2f56c.json b/leaderboard_data/HFOpenLLMv2/google/ModelSpace_GemmaX2-28-9B-v0.1/6cb560eb-08f5-4430-8797-1116f1d2f56c.json deleted file mode 100644 index 2450df08c50c775933facec73040904b349cdaa1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/ModelSpace_GemmaX2-28-9B-v0.1/6cb560eb-08f5-4430-8797-1116f1d2f56c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ModelSpace_GemmaX2-28-9B-v0.1/1762652579.76179", - "retrieved_timestamp": "1762652579.761791", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ModelSpace/GemmaX2-28-9B-v0.1", - "developer": "google", - "inference_platform": "unknown", - "id": "ModelSpace/GemmaX2-28-9B-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.003921816336210145 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3687226427280163 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35365625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2230718085106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/NAPS-ai_naps-gemma-2-27b-v-0.1.0/8768f068-452f-4a54-bddb-9f6cffaf5a19.json b/leaderboard_data/HFOpenLLMv2/google/NAPS-ai_naps-gemma-2-27b-v-0.1.0/8768f068-452f-4a54-bddb-9f6cffaf5a19.json deleted file mode 100644 index 33ab1875915786d8ae9fe8b889eaadca3e827eac..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/NAPS-ai_naps-gemma-2-27b-v-0.1.0/8768f068-452f-4a54-bddb-9f6cffaf5a19.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-gemma-2-27b-v-0.1.0/1762652579.7653928", - "retrieved_timestamp": "1762652579.765394", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NAPS-ai/naps-gemma-2-27b-v-0.1.0", - "developer": "google", - "inference_platform": "unknown", - "id": "NAPS-ai/naps-gemma-2-27b-v-0.1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911778102988436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35753125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11677194148936171 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/NAPS-ai_naps-gemma-2-27b-v0.1.0/b004d154-392d-4f31-afbb-547b058996bd.json b/leaderboard_data/HFOpenLLMv2/google/NAPS-ai_naps-gemma-2-27b-v0.1.0/b004d154-392d-4f31-afbb-547b058996bd.json deleted file mode 100644 index 33ae17d7ec6123b6f59fc32d9c37611faff1eb84..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/NAPS-ai_naps-gemma-2-27b-v0.1.0/b004d154-392d-4f31-afbb-547b058996bd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-gemma-2-27b-v0.1.0/1762652579.765648", - "retrieved_timestamp": "1762652579.7656488", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NAPS-ai/naps-gemma-2-27b-v0.1.0", - "developer": "google", - "inference_platform": "unknown", - "id": "NAPS-ai/naps-gemma-2-27b-v0.1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911778102988436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35753125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11677194148936171 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/SaisExperiments_Gemma-2-2B-Stheno-Filtered/16070acb-e8bb-476a-b5aa-863a85cb0aee.json b/leaderboard_data/HFOpenLLMv2/google/SaisExperiments_Gemma-2-2B-Stheno-Filtered/16070acb-e8bb-476a-b5aa-863a85cb0aee.json deleted file mode 100644 index 548808c804b6030de4b1d83e3e7f868dde1b2b33..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/SaisExperiments_Gemma-2-2B-Stheno-Filtered/16070acb-e8bb-476a-b5aa-863a85cb0aee.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SaisExperiments_Gemma-2-2B-Stheno-Filtered/1762652579.855671", - "retrieved_timestamp": "1762652579.8556721", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SaisExperiments/Gemma-2-2B-Stheno-Filtered", - "developer": "google", - "inference_platform": "unknown", - "id": "SaisExperiments/Gemma-2-2B-Stheno-Filtered" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4196554032190144 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4149234152222183 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40029166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2629654255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/Skywork_Skywork-Reward-Gemma-2-27B-v0.2/140b0661-2961-46f3-8c75-cb75147e0acc.json b/leaderboard_data/HFOpenLLMv2/google/Skywork_Skywork-Reward-Gemma-2-27B-v0.2/140b0661-2961-46f3-8c75-cb75147e0acc.json deleted file mode 100644 index 6e0fa4ae54967fb0a1ad5211a7b0d466689906b8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/Skywork_Skywork-Reward-Gemma-2-27B-v0.2/140b0661-2961-46f3-8c75-cb75147e0acc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Skywork_Skywork-Reward-Gemma-2-27B-v0.2/1762652579.8884969", - "retrieved_timestamp": "1762652579.8884978", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Skywork/Skywork-Reward-Gemma-2-27B-v0.2", - "developer": "google", - "inference_platform": "unknown", - "id": "Skywork/Skywork-Reward-Gemma-2-27B-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7807317916461656 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.635960062329604 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22734138972809667 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42314583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4103224734042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForSequenceClassification", - "params_billions": 27.227 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/Sorawiz_Gemma-9B-Base/246e4c1f-016c-411e-870e-9ade63713daa.json b/leaderboard_data/HFOpenLLMv2/google/Sorawiz_Gemma-9B-Base/246e4c1f-016c-411e-870e-9ade63713daa.json deleted file mode 100644 index 0acd66faaf2ab7f7588dd83aa8cad09a1cbc8215..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/Sorawiz_Gemma-9B-Base/246e4c1f-016c-411e-870e-9ade63713daa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sorawiz_Gemma-9B-Base/1762652579.8897338", - "retrieved_timestamp": "1762652579.889735", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sorawiz/Gemma-9B-Base", - "developer": "google", - "inference_platform": "unknown", - "id": "Sorawiz/Gemma-9B-Base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16673758959560633 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.593040577894583 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09818731117824774 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976510067114096 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40451041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42353723404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/Sorawiz_Gemma-Creative-9B-Base/26229a4f-9f53-453f-9899-77808040f8cb.json b/leaderboard_data/HFOpenLLMv2/google/Sorawiz_Gemma-Creative-9B-Base/26229a4f-9f53-453f-9899-77808040f8cb.json deleted file mode 100644 index 27be9020e1b145eb7a9b973c58bd55b3e23fa589..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/Sorawiz_Gemma-Creative-9B-Base/26229a4f-9f53-453f-9899-77808040f8cb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sorawiz_Gemma-Creative-9B-Base/1762652579.890075", - "retrieved_timestamp": "1762652579.890076", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sorawiz/Gemma-Creative-9B-Base", - "developer": "google", - "inference_platform": "unknown", - "id": "Sorawiz/Gemma-Creative-9B-Base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1515002415812267 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5458614335095562 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3296979865771812 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.401875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4007646276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/Supichi_BBAI_135_Gemma/64cd00af-6782-431b-aac1-445e39d56717.json b/leaderboard_data/HFOpenLLMv2/google/Supichi_BBAI_135_Gemma/64cd00af-6782-431b-aac1-445e39d56717.json deleted file mode 100644 index 17ef9004a03966a53b4cc640fd08e98e96b9791f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/Supichi_BBAI_135_Gemma/64cd00af-6782-431b-aac1-445e39d56717.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Supichi_BBAI_135_Gemma/1762652579.8946822", - "retrieved_timestamp": "1762652579.894683", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Supichi/BBAI_135_Gemma", - "developer": "google", - "inference_platform": "unknown", - "id": "Supichi/BBAI_135_Gemma" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06562144000141845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35684129093449685 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38047916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16722074468085107 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 19.3 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Gemmasutra-9B-v1/3f7a68f4-e456-4ecf-8a5f-1f3698822a89.json b/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Gemmasutra-9B-v1/3f7a68f4-e456-4ecf-8a5f-1f3698822a89.json deleted file mode 100644 index 1340a4252d63decb5994a28af0bc1a1eb4ee98ae..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Gemmasutra-9B-v1/3f7a68f4-e456-4ecf-8a5f-1f3698822a89.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TheDrummer_Gemmasutra-9B-v1/1762652579.9140742", - "retrieved_timestamp": "1762652579.914075", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TheDrummer/Gemmasutra-9B-v1", - "developer": "google", - "inference_platform": "unknown", - "id": "TheDrummer/Gemmasutra-9B-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24155130609006326 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5886914248369671 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48459375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4045046542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Gemmasutra-Mini-2B-v1/3c066bd3-ec6c-412d-86a1-759c228610b9.json b/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Gemmasutra-Mini-2B-v1/3c066bd3-ec6c-412d-86a1-759c228610b9.json deleted file mode 100644 index ae84f36302a412f264c1ace0e70685fe829c2c57..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Gemmasutra-Mini-2B-v1/3c066bd3-ec6c-412d-86a1-759c228610b9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TheDrummer_Gemmasutra-Mini-2B-v1/1762652579.914318", - "retrieved_timestamp": "1762652579.914319", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TheDrummer/Gemmasutra-Mini-2B-v1", - "developer": "google", - "inference_platform": "unknown", - "id": "TheDrummer/Gemmasutra-Mini-2B-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25486597782771936 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35750190791471836 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3489791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20545212765957446 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Tiger-Gemma-9B-v1/7b093f59-7a4e-4e72-b9a6-7d10870917ea.json b/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Tiger-Gemma-9B-v1/7b093f59-7a4e-4e72-b9a6-7d10870917ea.json deleted file mode 100644 index 0bcb644f9a5d78d6523999605260420a4770e1fd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Tiger-Gemma-9B-v1/7b093f59-7a4e-4e72-b9a6-7d10870917ea.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TheDrummer_Tiger-Gemma-9B-v1/1762652579.915312", - "retrieved_timestamp": "1762652579.915313", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TheDrummer/Tiger-Gemma-9B-v1", - "developer": "google", - "inference_platform": "unknown", - "id": "TheDrummer/Tiger-Gemma-9B-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.728150197032762 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5703687739329574 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18353474320241692 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3389261744966443 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41616666666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41181848404255317 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Tiger-Gemma-9B-v2/962205b9-009a-4201-b382-5143c80e78ce.json b/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Tiger-Gemma-9B-v2/962205b9-009a-4201-b382-5143c80e78ce.json deleted file mode 100644 index f376d5dd70bd42eba8d2ac6f432a7106b117b6a4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Tiger-Gemma-9B-v2/962205b9-009a-4201-b382-5143c80e78ce.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TheDrummer_Tiger-Gemma-9B-v2/1762652579.915529", - "retrieved_timestamp": "1762652579.91553", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TheDrummer/Tiger-Gemma-9B-v2", - "developer": "google", - "inference_platform": "unknown", - "id": "TheDrummer/Tiger-Gemma-9B-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6985997154217476 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5617191114121779 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18202416918429004 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976510067114096 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40841666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41123670212765956 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Tiger-Gemma-9B-v3/6fbfd3ba-e28a-4e9d-be12-e04b6d50b9ee.json b/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Tiger-Gemma-9B-v3/6fbfd3ba-e28a-4e9d-be12-e04b6d50b9ee.json deleted file mode 100644 index 5822da3931f847e3932d33634a878cad29e7b149..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Tiger-Gemma-9B-v3/6fbfd3ba-e28a-4e9d-be12-e04b6d50b9ee.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TheDrummer_Tiger-Gemma-9B-v3/1762652579.915734", - "retrieved_timestamp": "1762652579.915734", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TheDrummer/Tiger-Gemma-9B-v3", - "developer": "google", - "inference_platform": "unknown", - "id": "TheDrummer/Tiger-Gemma-9B-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6820635912711606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5812231557853248 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1623867069486405 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3389261744966443 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4003541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40591755319148937 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/Triangle104_Gemmadevi-Stock-10B/153fd43a-fe54-4a99-98dd-5420f2bf8b66.json b/leaderboard_data/HFOpenLLMv2/google/Triangle104_Gemmadevi-Stock-10B/153fd43a-fe54-4a99-98dd-5420f2bf8b66.json deleted file mode 100644 index cb7771fb2f5543fbf7453f461bd58549a447b6c1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/Triangle104_Gemmadevi-Stock-10B/153fd43a-fe54-4a99-98dd-5420f2bf8b66.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Gemmadevi-Stock-10B/1762652579.9249291", - "retrieved_timestamp": "1762652579.9249291", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Gemmadevi-Stock-10B", - "developer": "google", - "inference_platform": "unknown", - "id": "Triangle104/Gemmadevi-Stock-10B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15819470117067158 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6065922684184144 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09667673716012085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35318791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46211458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4261968085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter1/687769ed-44e9-4f3d-aee6-2dc4e98dd7ee.json b/leaderboard_data/HFOpenLLMv2/google/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter1/687769ed-44e9-4f3d-aee6-2dc4e98dd7ee.json deleted file mode 100644 index e1fd6426e67ce31fd849d294719f386c07afe3fc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter1/687769ed-44e9-4f3d-aee6-2dc4e98dd7ee.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter1/1762652579.936019", - "retrieved_timestamp": "1762652579.93602", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter1", - "developer": "google", - "inference_platform": "unknown", - "id": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.308221075634871 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5968934762705508 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4099375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39070811170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter2/fa584f01-69eb-4ecc-9f0d-049b6bfb05c8.json b/leaderboard_data/HFOpenLLMv2/google/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter2/fa584f01-69eb-4ecc-9f0d-049b6bfb05c8.json deleted file mode 100644 index 9444e2da916649cd8781824675707d851550d3ed..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter2/fa584f01-69eb-4ecc-9f0d-049b6bfb05c8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter2/1762652579.936279", - "retrieved_timestamp": "1762652579.93628", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2", - "developer": "google", - "inference_platform": "unknown", - "id": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3100196367859502 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5989880877421281 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08081570996978851 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4139375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.386968085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter3/f318d457-d295-4447-9222-0b0d92708b5d.json b/leaderboard_data/HFOpenLLMv2/google/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter3/f318d457-d295-4447-9222-0b0d92708b5d.json deleted file mode 100644 index 658860b0110513b164b7b2487c2a1cbb90b76610..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter3/f318d457-d295-4447-9222-0b0d92708b5d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter3/1762652579.9364889", - "retrieved_timestamp": "1762652579.93649", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3", - "developer": "google", - "inference_platform": "unknown", - "id": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31671409637539505 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6007080229268026 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07099697885196375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3389261744966443 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41660416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.382563164893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/VAGOsolutions_SauerkrautLM-Gemma-2b/b002a274-9b4f-40ad-b0c7-e4efabbe431f.json b/leaderboard_data/HFOpenLLMv2/google/VAGOsolutions_SauerkrautLM-Gemma-2b/b002a274-9b4f-40ad-b0c7-e4efabbe431f.json deleted file mode 100644 index 328d48174f4ac968e1ded0421e5b7a609af782d2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/VAGOsolutions_SauerkrautLM-Gemma-2b/b002a274-9b4f-40ad-b0c7-e4efabbe431f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-Gemma-2b/1762652579.941349", - "retrieved_timestamp": "1762652579.94135", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-Gemma-2b", - "developer": "google", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-Gemma-2b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24752213017017072 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3416315376053174 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3675833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14685837765957446 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/VAGOsolutions_SauerkrautLM-Gemma-7b/e66f4326-2585-4581-b45f-d9a81fb1576c.json b/leaderboard_data/HFOpenLLMv2/google/VAGOsolutions_SauerkrautLM-Gemma-7b/e66f4326-2585-4581-b45f-d9a81fb1576c.json deleted file mode 100644 index f8d978b0a0d6548c7cfa610a3d7d93307c664048..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/VAGOsolutions_SauerkrautLM-Gemma-7b/e66f4326-2585-4581-b45f-d9a81fb1576c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-Gemma-7b/1762652579.9415941", - "retrieved_timestamp": "1762652579.9415948", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-Gemma-7b", - "developer": "google", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-Gemma-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3406705319662939 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41879127895858687 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35942708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961269946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/VAGOsolutions_SauerkrautLM-gemma-2-2b-it/b010858c-edb5-4e49-b5b6-72b06943ab2c.json b/leaderboard_data/HFOpenLLMv2/google/VAGOsolutions_SauerkrautLM-gemma-2-2b-it/b010858c-edb5-4e49-b5b6-72b06943ab2c.json deleted file mode 100644 index be8967233ad1495cd1dcdcca7dc864840829e62c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/VAGOsolutions_SauerkrautLM-gemma-2-2b-it/b010858c-edb5-4e49-b5b6-72b06943ab2c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-gemma-2-2b-it/1762652579.9427688", - "retrieved_timestamp": "1762652579.94277", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-gemma-2-2b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-gemma-2-2b-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13206625088099574 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42408371860644856 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3994583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.269281914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/VAGOsolutions_SauerkrautLM-gemma-2-9b-it/5395cbac-afe0-4936-b4eb-f554fcb5be75.json b/leaderboard_data/HFOpenLLMv2/google/VAGOsolutions_SauerkrautLM-gemma-2-9b-it/5395cbac-afe0-4936-b4eb-f554fcb5be75.json deleted file mode 100644 index b182fc0eaa87f0224ad56a81d01b67e9c494ac35..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/VAGOsolutions_SauerkrautLM-gemma-2-9b-it/5395cbac-afe0-4936-b4eb-f554fcb5be75.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-gemma-2-9b-it/1762652579.94298", - "retrieved_timestamp": "1762652579.942981", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-gemma-2-9b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-gemma-2-9b-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3024009627787604 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6072645787154746 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08383685800604229 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43182291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40907579787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/Youlln_4PRYMMAL-GEMMA2-9B-SLERP/06b75d54-4d17-4116-a4d5-0917eedb2dc4.json b/leaderboard_data/HFOpenLLMv2/google/Youlln_4PRYMMAL-GEMMA2-9B-SLERP/06b75d54-4d17-4116-a4d5-0917eedb2dc4.json deleted file mode 100644 index 3e84cf7881b4e16ae561d1214a1b32e2b613f863..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/Youlln_4PRYMMAL-GEMMA2-9B-SLERP/06b75d54-4d17-4116-a4d5-0917eedb2dc4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Youlln_4PRYMMAL-GEMMA2-9B-SLERP/1762652579.961175", - "retrieved_timestamp": "1762652579.9611762", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Youlln/4PRYMMAL-GEMMA2-9B-SLERP", - "developer": "google", - "inference_platform": "unknown", - "id": "Youlln/4PRYMMAL-GEMMA2-9B-SLERP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2713766140507188 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5922529923998928 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46719791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42096077127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/ZHLiu627_zephyr-7b-gemma-rpo-avg/6333359d-1cf7-4905-9a48-f8a8f7b46ed2.json b/leaderboard_data/HFOpenLLMv2/google/ZHLiu627_zephyr-7b-gemma-rpo-avg/6333359d-1cf7-4905-9a48-f8a8f7b46ed2.json deleted file mode 100644 index 5af0f8c5a56d96fd0fb0934cc242cbab69e9256d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/ZHLiu627_zephyr-7b-gemma-rpo-avg/6333359d-1cf7-4905-9a48-f8a8f7b46ed2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ZHLiu627_zephyr-7b-gemma-rpo-avg/1762652579.9660559", - "retrieved_timestamp": "1762652579.966057", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ZHLiu627/zephyr-7b-gemma-rpo-avg", - "developer": "google", - "inference_platform": "unknown", - "id": "ZHLiu627/zephyr-7b-gemma-rpo-avg" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30060350979844586 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41832761356743015 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40810416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2830784574468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/agentlans_Gemma2-9B-AdvancedFuse/3bcdf1ca-ad29-45cf-ac97-6bc508981545.json b/leaderboard_data/HFOpenLLMv2/google/agentlans_Gemma2-9B-AdvancedFuse/3bcdf1ca-ad29-45cf-ac97-6bc508981545.json deleted file mode 100644 index 4b1006c544785c34c85c743063047bbaaad8897c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/agentlans_Gemma2-9B-AdvancedFuse/3bcdf1ca-ad29-45cf-ac97-6bc508981545.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/agentlans_Gemma2-9B-AdvancedFuse/1762652579.975734", - "retrieved_timestamp": "1762652579.975735", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "agentlans/Gemma2-9B-AdvancedFuse", - "developer": "google", - "inference_platform": "unknown", - "id": "agentlans/Gemma2-9B-AdvancedFuse" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15427288483446144 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.585936684475517 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10045317220543806 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4230833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4000166223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp1-2.6B/e52ac657-26a3-499a-949f-bf2a0b620d8e.json b/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp1-2.6B/e52ac657-26a3-499a-949f-bf2a0b620d8e.json deleted file mode 100644 index 6419fe7b765590a839081b8a1c4b2d864fd332c2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp1-2.6B/e52ac657-26a3-499a-949f-bf2a0b620d8e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp1-2.6B/1762652579.985875", - "retrieved_timestamp": "1762652579.985876", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Gemma2Slerp1-2.6B", - "developer": "google", - "inference_platform": "unknown", - "id": "allknowingroger/Gemma2Slerp1-2.6B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5354348683714766 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343094462630086 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45616666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26886635638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp1-27B/42d79295-bdb0-411d-b1b0-5cff954e925c.json b/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp1-27B/42d79295-bdb0-411d-b1b0-5cff954e925c.json deleted file mode 100644 index 2418f5dead3335d02ca4f815e8b807570befe565..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp1-27B/42d79295-bdb0-411d-b1b0-5cff954e925c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp1-27B/1762652579.986121", - "retrieved_timestamp": "1762652579.986122", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Gemma2Slerp1-27B", - "developer": "google", - "inference_platform": "unknown", - "id": "allknowingroger/Gemma2Slerp1-27B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7186332265056716 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6398902146527521 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2583081570996979 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640939597315436 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47671875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44564494680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp2-2.6B/eeb46285-0c8d-43b7-9b6d-e86c24064fde.json b/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp2-2.6B/eeb46285-0c8d-43b7-9b6d-e86c24064fde.json deleted file mode 100644 index b3c302fb0bdf13effaca96ea338602ad6c43d29b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp2-2.6B/eeb46285-0c8d-43b7-9b6d-e86c24064fde.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp2-2.6B/1762652579.98633", - "retrieved_timestamp": "1762652579.98633", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Gemma2Slerp2-2.6B", - "developer": "google", - "inference_platform": "unknown", - "id": "allknowingroger/Gemma2Slerp2-2.6B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5747272791748117 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4307646783089521 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44677083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26961436170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp2-27B/1f2c33e8-2d7b-4bd5-81e8-1c9bcae0ae8f.json b/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp2-27B/1f2c33e8-2d7b-4bd5-81e8-1c9bcae0ae8f.json deleted file mode 100644 index 1ba58c357fe7fa4c882f7916eb9e83f1df8cf0c0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp2-27B/1f2c33e8-2d7b-4bd5-81e8-1c9bcae0ae8f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp2-27B/1762652579.986531", - "retrieved_timestamp": "1762652579.9865322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Gemma2Slerp2-27B", - "developer": "google", - "inference_platform": "unknown", - "id": "allknowingroger/Gemma2Slerp2-27B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7545534736720789 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6557274121032689 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27870090634441086 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699664429530201 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46208333333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46226728723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp3-27B/648810d4-4dd5-48c7-a4d7-b3d9d2f3f3f2.json b/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp3-27B/648810d4-4dd5-48c7-a4d7-b3d9d2f3f3f2.json deleted file mode 100644 index b71b1c59948163ef6886d0215db9cbdb1b741c63..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp3-27B/648810d4-4dd5-48c7-a4d7-b3d9d2f3f3f2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp3-27B/1762652579.986752", - "retrieved_timestamp": "1762652579.986753", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Gemma2Slerp3-27B", - "developer": "google", - "inference_platform": "unknown", - "id": "allknowingroger/Gemma2Slerp3-27B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7426384216102164 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6499638721230724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27416918429003023 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3548657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47402083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4640957446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp4-27B/f94f3bf1-cf85-4673-a5cf-368f250233e4.json b/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp4-27B/f94f3bf1-cf85-4673-a5cf-368f250233e4.json deleted file mode 100644 index 0489aac8e03f95058bbd622ebca6df01b3260cd9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp4-27B/f94f3bf1-cf85-4673-a5cf-368f250233e4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp4-27B/1762652579.986965", - "retrieved_timestamp": "1762652579.9869661", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Gemma2Slerp4-27B", - "developer": "google", - "inference_platform": "unknown", - "id": "allknowingroger/Gemma2Slerp4-27B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7496575752337131 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6529581339749019 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2719033232628399 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36661073825503354 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4502395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46492686170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaSlerp-9B/3aed9fd2-45bd-4568-8885-7fc2370bb26d.json b/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaSlerp-9B/3aed9fd2-45bd-4568-8885-7fc2370bb26d.json deleted file mode 100644 index 3870f37d2f9b1147bff65a50b4d8d8287aa1a2b3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaSlerp-9B/3aed9fd2-45bd-4568-8885-7fc2370bb26d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_GemmaSlerp-9B/1762652579.987181", - "retrieved_timestamp": "1762652579.9871821", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/GemmaSlerp-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "allknowingroger/GemmaSlerp-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.704320092909037 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.592057786577488 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21601208459214502 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46732291666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41605718085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaSlerp2-9B/99333370-c7d5-4763-b3a4-14adde0fab9e.json b/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaSlerp2-9B/99333370-c7d5-4763-b3a4-14adde0fab9e.json deleted file mode 100644 index b1fecde55af05fd2567b8f5d170a18a7f8005b93..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaSlerp2-9B/99333370-c7d5-4763-b3a4-14adde0fab9e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_GemmaSlerp2-9B/1762652579.987394", - "retrieved_timestamp": "1762652579.987395", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/GemmaSlerp2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "allknowingroger/GemmaSlerp2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7281003293483512 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.598271299766216 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2107250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47671875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42386968085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaSlerp4-10B/32e38c82-d412-4888-9d9d-f89aef0989fd.json b/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaSlerp4-10B/32e38c82-d412-4888-9d9d-f89aef0989fd.json deleted file mode 100644 index 17b69a9ea799db2064ec23a7e9c1a6c12fde7f6b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaSlerp4-10B/32e38c82-d412-4888-9d9d-f89aef0989fd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_GemmaSlerp4-10B/1762652579.9875991", - "retrieved_timestamp": "1762652579.9875998", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/GemmaSlerp4-10B", - "developer": "google", - "inference_platform": "unknown", - "id": "allknowingroger/GemmaSlerp4-10B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7326216660682544 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6027862253440982 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35318791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45398958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4250332446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaSlerp5-10B/e325b56f-4306-4e37-adc5-c09b300a8c30.json b/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaSlerp5-10B/e325b56f-4306-4e37-adc5-c09b300a8c30.json deleted file mode 100644 index 4ab0552c7f767ef0f7484dfae1cc33b1efaa9a0c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaSlerp5-10B/e325b56f-4306-4e37-adc5-c09b300a8c30.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_GemmaSlerp5-10B/1762652579.9878101", - "retrieved_timestamp": "1762652579.987811", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/GemmaSlerp5-10B", - "developer": "google", - "inference_platform": "unknown", - "id": "allknowingroger/GemmaSlerp5-10B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7353444416370785 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.605447654436423 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21827794561933533 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46078125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4328457446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaStock1-27B/0b19d8bb-1952-4515-8d29-e55e1106e92b.json b/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaStock1-27B/0b19d8bb-1952-4515-8d29-e55e1106e92b.json deleted file mode 100644 index f4e64aa817dcd5224c865e99d93c6cccc461af81..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaStock1-27B/0b19d8bb-1952-4515-8d29-e55e1106e92b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_GemmaStock1-27B/1762652579.9880252", - "retrieved_timestamp": "1762652579.9880252", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/GemmaStock1-27B", - "developer": "google", - "inference_platform": "unknown", - "id": "allknowingroger/GemmaStock1-27B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7509064836855099 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6565607454366021 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.263595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640939597315436 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45268749999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47298869680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/anakin87_gemma-2b-orpo/80531a18-00d3-4264-bf84-cd1d4d90df08.json b/leaderboard_data/HFOpenLLMv2/google/anakin87_gemma-2b-orpo/80531a18-00d3-4264-bf84-cd1d4d90df08.json deleted file mode 100644 index 57ed13a5478a892aaab4c3da4a05a24917869978..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/anakin87_gemma-2b-orpo/80531a18-00d3-4264-bf84-cd1d4d90df08.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/anakin87_gemma-2b-orpo/1762652580.010973", - "retrieved_timestamp": "1762652580.010974", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "anakin87/gemma-2b-orpo", - "developer": "google", - "inference_platform": "unknown", - "id": "anakin87/gemma-2b-orpo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24779695651981187 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34261709435617754 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37276041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1305684840425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/anthracite-org_magnum-v3-9b-customgemma2/865b86aa-7b8d-4619-aa57-3c57cc4c7b51.json b/leaderboard_data/HFOpenLLMv2/google/anthracite-org_magnum-v3-9b-customgemma2/865b86aa-7b8d-4619-aa57-3c57cc4c7b51.json deleted file mode 100644 index 13953df147fcc05db141dd159e39ca486e06bfff..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/anthracite-org_magnum-v3-9b-customgemma2/865b86aa-7b8d-4619-aa57-3c57cc4c7b51.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v3-9b-customgemma2/1762652580.012768", - "retrieved_timestamp": "1762652580.012769", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "anthracite-org/magnum-v3-9b-customgemma2", - "developer": "google", - "inference_platform": "unknown", - "id": "anthracite-org/magnum-v3-9b-customgemma2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1272955757390391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5340136936916174 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45646875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4204621010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/beomi_gemma-mling-7b/2568a2b7-e95c-4224-9850-5816466b50f2.json b/leaderboard_data/HFOpenLLMv2/google/beomi_gemma-mling-7b/2568a2b7-e95c-4224-9850-5816466b50f2.json deleted file mode 100644 index c9f2ce1b66b9b7ff25ebd4112911ea4eefc8ec62..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/beomi_gemma-mling-7b/2568a2b7-e95c-4224-9850-5816466b50f2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/beomi_gemma-mling-7b/1762652580.030431", - "retrieved_timestamp": "1762652580.030431", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "beomi/gemma-mling-7b", - "developer": "google", - "inference_platform": "unknown", - "id": "beomi/gemma-mling-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20290939152559653 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40675941947154004 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37585416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2632978723404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/bunnycore_Gemma-2-2B-Smart/ebada07f-e700-4f38-aec0-f801959969e6.json b/leaderboard_data/HFOpenLLMv2/google/bunnycore_Gemma-2-2B-Smart/ebada07f-e700-4f38-aec0-f801959969e6.json deleted file mode 100644 index 4fecca52892330602d448043e7e784f4b3164e5d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/bunnycore_Gemma-2-2B-Smart/ebada07f-e700-4f38-aec0-f801959969e6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Gemma-2-2B-Smart/1762652580.044707", - "retrieved_timestamp": "1762652580.044708", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Gemma-2-2B-Smart", - "developer": "google", - "inference_platform": "unknown", - "id": "bunnycore/Gemma-2-2B-Smart" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13206625088099574 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39742674570492836 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4248541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2426030585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/bunnycore_Gemma2-9B-TitanFusion/95a2d032-e2a4-46df-84d2-6b7529d5bb01.json b/leaderboard_data/HFOpenLLMv2/google/bunnycore_Gemma2-9B-TitanFusion/95a2d032-e2a4-46df-84d2-6b7529d5bb01.json deleted file mode 100644 index 129ecba1a4eaf5694b866d7e397c889f10177381..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/bunnycore_Gemma2-9B-TitanFusion/95a2d032-e2a4-46df-84d2-6b7529d5bb01.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Gemma2-9B-TitanFusion/1762652580.044988", - "retrieved_timestamp": "1762652580.0449889", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Gemma2-9B-TitanFusion", - "developer": "google", - "inference_platform": "unknown", - "id": "bunnycore/Gemma2-9B-TitanFusion" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16184169115724056 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5712026020785131 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0770392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41362499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39602726063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/cat-searcher_gemma-2-9b-it-sppo-iter-1-evol-1/af7a7129-1b6a-4ff5-952f-075ae4f7c137.json b/leaderboard_data/HFOpenLLMv2/google/cat-searcher_gemma-2-9b-it-sppo-iter-1-evol-1/af7a7129-1b6a-4ff5-952f-075ae4f7c137.json deleted file mode 100644 index 3bbd8aa8645fd8a122875e5c027bea557297247b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/cat-searcher_gemma-2-9b-it-sppo-iter-1-evol-1/af7a7129-1b6a-4ff5-952f-075ae4f7c137.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cat-searcher_gemma-2-9b-it-sppo-iter-1-evol-1/1762652580.099224", - "retrieved_timestamp": "1762652580.099225", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cat-searcher/gemma-2-9b-it-sppo-iter-1-evol-1", - "developer": "google", - "inference_platform": "unknown", - "id": "cat-searcher/gemma-2-9b-it-sppo-iter-1-evol-1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2941827683878775 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5939369622672414 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08534743202416918 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060402684563756 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39257291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37998670212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/cat-searcher_gemma-2-9b-it-sppo-iter-1/3c33f6b0-dc40-4a61-bbbe-063b9d8d30e3.json b/leaderboard_data/HFOpenLLMv2/google/cat-searcher_gemma-2-9b-it-sppo-iter-1/3c33f6b0-dc40-4a61-bbbe-063b9d8d30e3.json deleted file mode 100644 index 2e214dea82aa24bf49c94289f08fc85750f77769..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/cat-searcher_gemma-2-9b-it-sppo-iter-1/3c33f6b0-dc40-4a61-bbbe-063b9d8d30e3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cat-searcher_gemma-2-9b-it-sppo-iter-1/1762652580.091131", - "retrieved_timestamp": "1762652580.091137", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cat-searcher/gemma-2-9b-it-sppo-iter-1", - "developer": "google", - "inference_platform": "unknown", - "id": "cat-searcher/gemma-2-9b-it-sppo-iter-1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30147674836101546 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5971867698707507 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447986577181208 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39266666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38538896276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/cognitivecomputations_dolphin-2.9.4-gemma2-2b/29a10f53-dd38-437b-a7f3-9756035df640.json b/leaderboard_data/HFOpenLLMv2/google/cognitivecomputations_dolphin-2.9.4-gemma2-2b/29a10f53-dd38-437b-a7f3-9756035df640.json deleted file mode 100644 index cfe44da244de5fd1b56d294c7a0a355c37eb16f1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/cognitivecomputations_dolphin-2.9.4-gemma2-2b/29a10f53-dd38-437b-a7f3-9756035df640.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.4-gemma2-2b/1762652580.115823", - "retrieved_timestamp": "1762652580.115823", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.4-gemma2-2b", - "developer": "google", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.4-gemma2-2b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08955127949396491 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40813187411055213 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41796875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2105219414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/djuna_Gemma-2-gemmama-9b/b2f24392-29aa-4a24-b489-87ea9b85daea.json b/leaderboard_data/HFOpenLLMv2/google/djuna_Gemma-2-gemmama-9b/b2f24392-29aa-4a24-b489-87ea9b85daea.json deleted file mode 100644 index aba38c4451e4db5a839c2654591ee9b143206be6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/djuna_Gemma-2-gemmama-9b/b2f24392-29aa-4a24-b489-87ea9b85daea.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/djuna_Gemma-2-gemmama-9b/1762652580.12782", - "retrieved_timestamp": "1762652580.127821", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "djuna/Gemma-2-gemmama-9b", - "developer": "google", - "inference_platform": "unknown", - "id": "djuna/Gemma-2-gemmama-9b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7703404743857409 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5420037856495951 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4031458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3109208776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/dwikitheduck_gemma-2-2b-id-inst/6d66b056-c83d-49b8-ac84-04396c0d97df.json b/leaderboard_data/HFOpenLLMv2/google/dwikitheduck_gemma-2-2b-id-inst/6d66b056-c83d-49b8-ac84-04396c0d97df.json deleted file mode 100644 index 8dd0d9b812ea29eb44f410b318248dab7a84e854..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/dwikitheduck_gemma-2-2b-id-inst/6d66b056-c83d-49b8-ac84-04396c0d97df.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dwikitheduck_gemma-2-2b-id-inst/1762652580.137194", - "retrieved_timestamp": "1762652580.137195", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dwikitheduck/gemma-2-2b-id-inst", - "developer": "google", - "inference_platform": "unknown", - "id": "dwikitheduck/gemma-2-2b-id-inst" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38785644312646006 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39621721241423097 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41542708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21733710106382978 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/dwikitheduck_gemma-2-2b-id/000b7f0b-9e2f-499a-9bab-b08767efb8ca.json b/leaderboard_data/HFOpenLLMv2/google/dwikitheduck_gemma-2-2b-id/000b7f0b-9e2f-499a-9bab-b08767efb8ca.json deleted file mode 100644 index 318d55a33af1a617bdd97a4891ba3a0ee3e2d5d1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/dwikitheduck_gemma-2-2b-id/000b7f0b-9e2f-499a-9bab-b08767efb8ca.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dwikitheduck_gemma-2-2b-id/1762652580.136933", - "retrieved_timestamp": "1762652580.136933", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dwikitheduck/gemma-2-2b-id", - "developer": "google", - "inference_platform": "unknown", - "id": "dwikitheduck/gemma-2-2b-id" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38785644312646006 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39621721241423097 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41542708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21733710106382978 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/ehristoforu_Gemma2-9B-it-psy10k-mental_health/25c93024-ce65-49d5-96da-00107bb37f77.json b/leaderboard_data/HFOpenLLMv2/google/ehristoforu_Gemma2-9B-it-psy10k-mental_health/25c93024-ce65-49d5-96da-00107bb37f77.json deleted file mode 100644 index f0b08fea4baccbaf7c40534659bc36a7ecccec2f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/ehristoforu_Gemma2-9B-it-psy10k-mental_health/25c93024-ce65-49d5-96da-00107bb37f77.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_Gemma2-9B-it-psy10k-mental_health/1762652580.139083", - "retrieved_timestamp": "1762652580.139084", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/Gemma2-9B-it-psy10k-mental_health", - "developer": "google", - "inference_platform": "unknown", - "id": "ehristoforu/Gemma2-9B-it-psy10k-mental_health" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5886658510529839 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5539376944027642 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16314199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40860416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38289561170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/ehristoforu_Gemma2-9b-it-train6/e289e629-17dd-440e-8839-d5dcbe535fd6.json b/leaderboard_data/HFOpenLLMv2/google/ehristoforu_Gemma2-9b-it-train6/e289e629-17dd-440e-8839-d5dcbe535fd6.json deleted file mode 100644 index f85136e91c1c6fa5331de705f30ee576848d6e7c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/ehristoforu_Gemma2-9b-it-train6/e289e629-17dd-440e-8839-d5dcbe535fd6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_Gemma2-9b-it-train6/1762652580.1393359", - "retrieved_timestamp": "1762652580.139337", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/Gemma2-9b-it-train6", - "developer": "google", - "inference_platform": "unknown", - "id": "ehristoforu/Gemma2-9b-it-train6" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7025215317579578 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5898092579133603 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19108761329305135 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40841666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39419880319148937 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/ell44ot_gemma-2b-def/9ba31c7b-13df-46f2-a164-1729563707e1.json b/leaderboard_data/HFOpenLLMv2/google/ell44ot_gemma-2b-def/9ba31c7b-13df-46f2-a164-1729563707e1.json deleted file mode 100644 index 052cbf5e48eededa34e9617593af858232c25e48..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/ell44ot_gemma-2b-def/9ba31c7b-13df-46f2-a164-1729563707e1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ell44ot_gemma-2b-def/1762652580.147274", - "retrieved_timestamp": "1762652580.147275", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ell44ot/gemma-2b-def", - "developer": "google", - "inference_platform": "unknown", - "id": "ell44ot/gemma-2b-def" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26930433472076315 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31586532094752634 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36702083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15724734042553193 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GemmaModel", - "params_billions": 1.546 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_codegemma-1.1-2b/9d92e421-c458-4ad3-b9bf-45c0ca1b90cf.json b/leaderboard_data/HFOpenLLMv2/google/google_codegemma-1.1-2b/9d92e421-c458-4ad3-b9bf-45c0ca1b90cf.json deleted file mode 100644 index c113455c95ea872f7cbcf7b48193ba42219a5511..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_codegemma-1.1-2b/9d92e421-c458-4ad3-b9bf-45c0ca1b90cf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_codegemma-1.1-2b/1762652580.172607", - "retrieved_timestamp": "1762652580.172608", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/codegemma-1.1-2b", - "developer": "google", - "inference_platform": "unknown", - "id": "google/codegemma-1.1-2b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22936253584932426 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3353417790248454 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3871458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1278257978723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-base/69eb63bf-72dd-4995-a8ec-49fd304a8ee7.json b/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-base/69eb63bf-72dd-4995-a8ec-49fd304a8ee7.json deleted file mode 100644 index 2abcd5be26f59618dce3ee3096ad3be791fd2130..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-base/69eb63bf-72dd-4995-a8ec-49fd304a8ee7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_flan-t5-base/1762652580.172907", - "retrieved_timestamp": "1762652580.172908", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/flan-t5-base", - "developer": "google", - "inference_platform": "unknown", - "id": "google/flan-t5-base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18907055501624578 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3525980599300322 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23825503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36711458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13572140957446807 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "T5ForConditionalGeneration", - "params_billions": 0.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-large/eb2e1202-9292-4f5e-a366-abc84897c66d.json b/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-large/eb2e1202-9292-4f5e-a366-abc84897c66d.json deleted file mode 100644 index a2ef83dd40f7d0595b036f5c66f6a54b564bbdf2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-large/eb2e1202-9292-4f5e-a366-abc84897c66d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_flan-t5-large/1762652580.173132", - "retrieved_timestamp": "1762652580.1731331", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/flan-t5-large", - "developer": "google", - "inference_platform": "unknown", - "id": "google/flan-t5-large" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22009490374428736 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41531150356794316 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40832291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17087765957446807 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "T5ForConditionalGeneration", - "params_billions": 0.783 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-small/368a36c5-8211-4240-ac88-3fd5e5414310.json b/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-small/368a36c5-8211-4240-ac88-3fd5e5414310.json deleted file mode 100644 index 1158cb78acf4d9d6f96ce8db75456dbcf5d4cdef..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-small/368a36c5-8211-4240-ac88-3fd5e5414310.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_flan-t5-small/1762652580.173366", - "retrieved_timestamp": "1762652580.173366", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/flan-t5-small", - "developer": "google", - "inference_platform": "unknown", - "id": "google/flan-t5-small" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1524255641697363 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3282901097640842 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41229166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1233377659574468 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "T5ForConditionalGeneration", - "params_billions": 0.077 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-xl/98a6a294-7b5d-4279-8aa6-6ed16248ce0b.json b/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-xl/98a6a294-7b5d-4279-8aa6-6ed16248ce0b.json deleted file mode 100644 index 27e3e4c1542c604ba3d58f8c19892de5deb9e5f1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-xl/98a6a294-7b5d-4279-8aa6-6ed16248ce0b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_flan-t5-xl/1762652580.1738272", - "retrieved_timestamp": "1762652580.1738281", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/flan-t5-xl", - "developer": "google", - "inference_platform": "unknown", - "id": "google/flan-t5-xl" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2206944241279804 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45372172155693963 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634442 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42203125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21417885638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "T5ForConditionalGeneration", - "params_billions": 2.85 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-xl/ab0ac321-1c2b-4523-b48c-de47ff06e7a3.json b/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-xl/ab0ac321-1c2b-4523-b48c-de47ff06e7a3.json deleted file mode 100644 index caaad62bc31a2a7fbfc61ad911ae8fd8e7878d6c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-xl/ab0ac321-1c2b-4523-b48c-de47ff06e7a3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_flan-t5-xl/1762652580.173602", - "retrieved_timestamp": "1762652580.173603", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/flan-t5-xl", - "developer": "google", - "inference_platform": "unknown", - "id": "google/flan-t5-xl" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22374189373085634 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45310636062112314 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41809375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21467752659574468 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "T5ForConditionalGeneration", - "params_billions": 2.85 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-xxl/e15f4783-510e-4b92-a999-072caa425d4c.json b/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-xxl/e15f4783-510e-4b92-a999-072caa425d4c.json deleted file mode 100644 index 2476c4752a184ff97482e71b496f90a91d09cafa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-xxl/e15f4783-510e-4b92-a999-072caa425d4c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_flan-t5-xxl/1762652580.174026", - "retrieved_timestamp": "1762652580.174026", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/flan-t5-xxl", - "developer": "google", - "inference_platform": "unknown", - "id": "google/flan-t5-xxl" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2200450360598767 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5065888015776924 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42175 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23429188829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "T5ForConditionalGeneration", - "params_billions": 11.267 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_flan-ul2/99941572-3e23-467c-97df-dfe1a2aa9805.json b/leaderboard_data/HFOpenLLMv2/google/google_flan-ul2/99941572-3e23-467c-97df-dfe1a2aa9805.json deleted file mode 100644 index 3323a8057555485144bb26ba7ba709179daddab5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_flan-ul2/99941572-3e23-467c-97df-dfe1a2aa9805.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_flan-ul2/1762652580.174251", - "retrieved_timestamp": "1762652580.174251", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/flan-ul2", - "developer": "google", - "inference_platform": "unknown", - "id": "google/flan-ul2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23925406809487715 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5053738049125648 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3843541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24933510638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "T5ForConditionalGeneration", - "params_billions": 19.46 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-1.1-2b-it/5ed676b6-4aff-4d71-a91a-6d5d9feeb28f.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-1.1-2b-it/5ed676b6-4aff-4d71-a91a-6d5d9feeb28f.json deleted file mode 100644 index dc3f1c19d05f60e3f51c47a33b2d7ffaa51fc76b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-1.1-2b-it/5ed676b6-4aff-4d71-a91a-6d5d9feeb28f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_gemma-1.1-2b-it/1762652580.1745641", - "retrieved_timestamp": "1762652580.174565", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/gemma-1.1-2b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-1.1-2b-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30674831668860847 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3184634974814922 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33939583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14835438829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-1.1-7b-it/6929c338-76a5-4386-9fa8-68e35a989a86.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-1.1-7b-it/6929c338-76a5-4386-9fa8-68e35a989a86.json deleted file mode 100644 index 90fa3ab088f41f684fa38928df55d490f9e5b2f6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-1.1-7b-it/6929c338-76a5-4386-9fa8-68e35a989a86.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_gemma-1.1-7b-it/1762652580.1748302", - "retrieved_timestamp": "1762652580.1748302", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/gemma-1.1-7b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-1.1-7b-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5039107346285633 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3935297962833251 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42302083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2583942819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-27b-it/5bcf96ce-efd1-4f90-91a1-edd548de71ad.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-27b-it/5bcf96ce-efd1-4f90-91a1-edd548de71ad.json deleted file mode 100644 index f4a1a2921ff0d51fdc6dcd6d90050a20adc91545..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-27b-it/5bcf96ce-efd1-4f90-91a1-edd548de71ad.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_gemma-2-27b-it/1762652580.17537", - "retrieved_timestamp": "1762652580.175371", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/gemma-2-27b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-2-27b-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7977677008116243 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6451387433168799 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23867069486404835 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40330208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4451462765957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-27b/12f7d5a6-3f8b-49d8-9ca8-38774dbcca92.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-27b/12f7d5a6-3f8b-49d8-9ca8-38774dbcca92.json deleted file mode 100644 index 59f8f5644a42216bc4b12c5ef5f0cb1aacb32e17..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-27b/12f7d5a6-3f8b-49d8-9ca8-38774dbcca92.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_gemma-2-27b/1762652580.175144", - "retrieved_timestamp": "1762652580.175145", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/gemma-2-27b", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-2-27b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24752213017017072 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5642908317482057 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43963541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4370844414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b-it/64daa9ea-cf1e-4787-90cf-ed72c5e23afd.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b-it/64daa9ea-cf1e-4787-90cf-ed72c5e23afd.json deleted file mode 100644 index c20168b194fb8939305662925db9c5544027297d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b-it/64daa9ea-cf1e-4787-90cf-ed72c5e23afd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_gemma-2-2b-it/1762652580.176172", - "retrieved_timestamp": "1762652580.176194", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/gemma-2-2b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-2-2b-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5668337788179807 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41992308914274706 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39288541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25498670212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "InternLM2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b-jpn-it/251b93fa-6f12-41bc-85c8-ded52e1a0d2d.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b-jpn-it/251b93fa-6f12-41bc-85c8-ded52e1a0d2d.json deleted file mode 100644 index 26f68cc2e61bb49683c5f4e3389fdf740003414f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b-jpn-it/251b93fa-6f12-41bc-85c8-ded52e1a0d2d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_gemma-2-2b-jpn-it/1762652580.1767948", - "retrieved_timestamp": "1762652580.176796", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/gemma-2-2b-jpn-it", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-2-2b-jpn-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5288401441508531 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4178440226217119 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37276041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2466755319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b-jpn-it/a09fdbce-489c-4d14-a05f-7663121bece7.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b-jpn-it/a09fdbce-489c-4d14-a05f-7663121bece7.json deleted file mode 100644 index de5c8169fdb1de2dc2c0995c623212bb227fa8b9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b-jpn-it/a09fdbce-489c-4d14-a05f-7663121bece7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_gemma-2-2b-jpn-it/1762652580.176506", - "retrieved_timestamp": "1762652580.176507", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/gemma-2-2b-jpn-it", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-2-2b-jpn-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5077826832803628 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42255698900658106 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39638541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2578125 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b/07e74f27-e0c3-448f-9a8c-a07ff8a73178.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b/07e74f27-e0c3-448f-9a8c-a07ff8a73178.json deleted file mode 100644 index cf64654349e6196e5ef0d75a1b4cd96c0050d72d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b/07e74f27-e0c3-448f-9a8c-a07ff8a73178.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_gemma-2-2b/1762652580.175597", - "retrieved_timestamp": "1762652580.1755981", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/gemma-2-2b", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-2-2b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19931226922343825 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3655966996422591 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4231770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21800199468085107 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "InternLM2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b/53fb75b1-2d9f-4af3-a358-18bf5d4a9032.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b/53fb75b1-2d9f-4af3-a358-18bf5d4a9032.json deleted file mode 100644 index e9df8fe5573ac5432d86afa4e10c5d43ce8064c7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b/53fb75b1-2d9f-4af3-a358-18bf5d4a9032.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_gemma-2-2b/1762652580.1759539", - "retrieved_timestamp": "1762652580.175955", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/gemma-2-2b", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-2-2b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20176021844262113 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3708674612470255 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.421875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22165890957446807 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "InternLM2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-9b-it/e8cef406-d6cc-48bd-872f-3d5b74bcf092.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-9b-it/e8cef406-d6cc-48bd-872f-3d5b74bcf092.json deleted file mode 100644 index 9a14c1bcdf976d8afd9504ee0f1d4d1174669f8f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-9b-it/e8cef406-d6cc-48bd-872f-3d5b74bcf092.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_gemma-2-9b-it/1762652580.177257", - "retrieved_timestamp": "1762652580.177258", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/gemma-2-9b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-2-9b-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7435626360279614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5990342504164132 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19486404833836857 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36073825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4072708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875498670212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-9b/2ac50111-a850-4bd2-8136-c373990742a5.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-9b/2ac50111-a850-4bd2-8136-c373990742a5.json deleted file mode 100644 index a1995120574f0cdad7a8b58fb5dcc9275be2fd32..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-9b/2ac50111-a850-4bd2-8136-c373990742a5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_gemma-2-9b/1762652580.177011", - "retrieved_timestamp": "1762652580.177012", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/gemma-2-9b", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-2-9b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20398320899657355 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5377373397621884 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4461145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4103224734042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2b-it/50dffd1a-ddf5-40fd-a2c8-e5dd140af617.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-2b-it/50dffd1a-ddf5-40fd-a2c8-e5dd140af617.json deleted file mode 100644 index 6e49827d85f6c3517e86a5115e43d937a989da10..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2b-it/50dffd1a-ddf5-40fd-a2c8-e5dd140af617.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_gemma-2b-it/1762652580.17777", - "retrieved_timestamp": "1762652580.17777", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/gemma-2b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-2b-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26902950837112194 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31508191988788464 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.334125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13530585106382978 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2b/2dd86ebc-0253-4801-ac99-2bb3494ad29b.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-2b/2dd86ebc-0253-4801-ac99-2bb3494ad29b.json deleted file mode 100644 index 17adafbabf30d1eaf784ee71bfc1e2ea135748b9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2b/2dd86ebc-0253-4801-ac99-2bb3494ad29b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_gemma-2b/1762652580.177512", - "retrieved_timestamp": "1762652580.177513", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/gemma-2b", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-2b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20375825033134307 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33656381705857935 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39778125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13655252659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-7b-it/30146048-ee0f-431d-b3e7-8c066c820740.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-7b-it/30146048-ee0f-431d-b3e7-8c066c820740.json deleted file mode 100644 index e18ad115d9c410f3546711ceaa8e58904002a7e1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-7b-it/30146048-ee0f-431d-b3e7-8c066c820740.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_gemma-7b-it/1762652580.178242", - "retrieved_timestamp": "1762652580.1782432", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/gemma-7b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-7b-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3868324933398937 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36459012743300967 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42742708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16946476063829788 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-7b/630e3cc0-fccc-41b3-b439-85a875dae401.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-7b/630e3cc0-fccc-41b3-b439-85a875dae401.json deleted file mode 100644 index 7b466c7d23986a94a4e8bf4849f5595f9008ee12..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-7b/630e3cc0-fccc-41b3-b439-85a875dae401.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_gemma-7b/1762652580.1780128", - "retrieved_timestamp": "1762652580.178014", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/gemma-7b", - "developer": "google", - "inference_platform": "unknown", - "id": "google/gemma-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2659321710838353 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43615285239286355 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4062395833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2947972074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GemmaForCausalLM", - "params_billions": 8.538 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_mt5-base/621fb00c-90a0-4295-9bd6-f5e102bc0bab.json b/leaderboard_data/HFOpenLLMv2/google/google_mt5-base/621fb00c-90a0-4295-9bd6-f5e102bc0bab.json deleted file mode 100644 index 7a1044fc2409bd14ad62069d9d613c587619737c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_mt5-base/621fb00c-90a0-4295-9bd6-f5e102bc0bab.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_mt5-base/1762652580.178463", - "retrieved_timestamp": "1762652580.178463", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/mt5-base", - "developer": "google", - "inference_platform": "unknown", - "id": "google/mt5-base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1645157072124186 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28831600228488835 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23909395973154363 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36720833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10696476063829788 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MT5ForConditionalGeneration", - "params_billions": 0.39 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_mt5-small/0d958c7c-5cd9-459f-a0e9-235b5d41ae53.json b/leaderboard_data/HFOpenLLMv2/google/google_mt5-small/0d958c7c-5cd9-459f-a0e9-235b5d41ae53.json deleted file mode 100644 index 0bf4ff92aadcb0cb95f835ed32a950288c6bed59..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_mt5-small/0d958c7c-5cd9-459f-a0e9-235b5d41ae53.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_mt5-small/1762652580.1787279", - "retrieved_timestamp": "1762652580.178729", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/mt5-small", - "developer": "google", - "inference_platform": "unknown", - "id": "google/mt5-small" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17180968718555653 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2765842029929075 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2424496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38575 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228390957446809 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MT5ForConditionalGeneration", - "params_billions": 0.17 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_mt5-xl/5abb3ce9-6ad4-4dfa-8bca-81ec6cb84426.json b/leaderboard_data/HFOpenLLMv2/google/google_mt5-xl/5abb3ce9-6ad4-4dfa-8bca-81ec6cb84426.json deleted file mode 100644 index d9ac4c2e5b0ff5961f8a3480e5b004b7276bdabc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_mt5-xl/5abb3ce9-6ad4-4dfa-8bca-81ec6cb84426.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_mt5-xl/1762652580.17897", - "retrieved_timestamp": "1762652580.1789708", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/mt5-xl", - "developer": "google", - "inference_platform": "unknown", - "id": "google/mt5-xl" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19596448534333347 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.304735837080435 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3795208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11195146276595745 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MT5ForConditionalGeneration", - "params_billions": 3.23 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_mt5-xxl/38520cce-b3b6-4f22-a6a8-313f6181f5ea.json b/leaderboard_data/HFOpenLLMv2/google/google_mt5-xxl/38520cce-b3b6-4f22-a6a8-313f6181f5ea.json deleted file mode 100644 index ddbda16ffb7f6121aa4f53dbbde5ecb70f51b7de..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_mt5-xxl/38520cce-b3b6-4f22-a6a8-313f6181f5ea.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_mt5-xxl/1762652580.1791801", - "retrieved_timestamp": "1762652580.1791801", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/mt5-xxl", - "developer": "google", - "inference_platform": "unknown", - "id": "google/mt5-xxl" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23575668116154028 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2959344159116905 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24161073825503357 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36894791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10887632978723404 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "T5ForConditionalGeneration", - "params_billions": 11.9 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_recurrentgemma-2b-it/a219b160-3dbd-4dcd-b39d-d12c6f9b1145.json b/leaderboard_data/HFOpenLLMv2/google/google_recurrentgemma-2b-it/a219b160-3dbd-4dcd-b39d-d12c6f9b1145.json deleted file mode 100644 index 78e1bce35d6df076b161596d0e6e5c0e43fbff4d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_recurrentgemma-2b-it/a219b160-3dbd-4dcd-b39d-d12c6f9b1145.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_recurrentgemma-2b-it/1762652580.17961", - "retrieved_timestamp": "1762652580.179611", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/recurrentgemma-2b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "google/recurrentgemma-2b-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2949329999955673 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33300047272606553 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3340625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1402094414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "RecurrentGemmaForCausalLM", - "params_billions": 2.683 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_recurrentgemma-2b/218a5d0f-5242-43c4-8166-81f5c09626bb.json b/leaderboard_data/HFOpenLLMv2/google/google_recurrentgemma-2b/218a5d0f-5242-43c4-8166-81f5c09626bb.json deleted file mode 100644 index c1103dbb40d4b035331422ce6fec37101dff32f4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_recurrentgemma-2b/218a5d0f-5242-43c4-8166-81f5c09626bb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_recurrentgemma-2b/1762652580.179393", - "retrieved_timestamp": "1762652580.179394", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/recurrentgemma-2b", - "developer": "google", - "inference_platform": "unknown", - "id": "google/recurrentgemma-2b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3017028151970106 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31973582830084474 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3445729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11760305851063829 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "RecurrentGemmaForCausalLM", - "params_billions": 2.683 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_recurrentgemma-9b-it/c7095b76-2d50-467b-a8d9-d7a277f1f14c.json b/leaderboard_data/HFOpenLLMv2/google/google_recurrentgemma-9b-it/c7095b76-2d50-467b-a8d9-d7a277f1f14c.json deleted file mode 100644 index 7bb86b9ece05231a8100bbee02e824908982e0d8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_recurrentgemma-9b-it/c7095b76-2d50-467b-a8d9-d7a277f1f14c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_recurrentgemma-9b-it/1762652580.180049", - "retrieved_timestamp": "1762652580.18005", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/recurrentgemma-9b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "google/recurrentgemma-9b-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5010383560065071 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4367189649027647 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43790625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2843251329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "RecurrentGemmaForCausalLM", - "params_billions": 9.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_recurrentgemma-9b/1ff3ab95-3007-4cbf-a146-5e8e4ae65404.json b/leaderboard_data/HFOpenLLMv2/google/google_recurrentgemma-9b/1ff3ab95-3007-4cbf-a146-5e8e4ae65404.json deleted file mode 100644 index 4218dda52eaadbbfae66eb7c25beb6db98d2d08a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_recurrentgemma-9b/1ff3ab95-3007-4cbf-a146-5e8e4ae65404.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_recurrentgemma-9b/1762652580.17984", - "retrieved_timestamp": "1762652580.179841", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/recurrentgemma-9b", - "developer": "google", - "inference_platform": "unknown", - "id": "google/recurrentgemma-9b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31159434744256354 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39562568669428394 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3802604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2604720744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "RecurrentGemmaForCausalLM", - "params_billions": 9.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_switch-base-8/43e22ce0-cdd7-424f-8a01-f9fea8b2a010.json b/leaderboard_data/HFOpenLLMv2/google/google_switch-base-8/43e22ce0-cdd7-424f-8a01-f9fea8b2a010.json deleted file mode 100644 index 2b4e27390ec85d9ac714f969ae8a3d0f8f9e825e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_switch-base-8/43e22ce0-cdd7-424f-8a01-f9fea8b2a010.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_switch-base-8/1762652580.180255", - "retrieved_timestamp": "1762652580.180256", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/switch-base-8", - "developer": "google", - "inference_platform": "unknown", - "id": "google/switch-base-8" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15852050337548815 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28763132730669333 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35173958333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10979055851063829 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "SwitchTransformersForConditionalGeneration", - "params_billions": 0.62 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/google_umt5-base/659053b0-7694-41e7-916d-28406b3ed572.json b/leaderboard_data/HFOpenLLMv2/google/google_umt5-base/659053b0-7694-41e7-916d-28406b3ed572.json deleted file mode 100644 index 0dffb525d0d75b9dd67805678287dcdafe5c2e80..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/google_umt5-base/659053b0-7694-41e7-916d-28406b3ed572.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/google_umt5-base/1762652580.180466", - "retrieved_timestamp": "1762652580.180467", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/umt5-base", - "developer": "google", - "inference_platform": "unknown", - "id": "google/umt5-base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.174632198123202 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27877262328945457 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33821875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10779587765957446 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "UMT5ForConditionalGeneration", - "params_billions": -1.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/grimjim_Gigantes-v1-gemma2-9b-it/57072a5e-1f64-4ae2-9e2c-caecc1dc05f4.json b/leaderboard_data/HFOpenLLMv2/google/grimjim_Gigantes-v1-gemma2-9b-it/57072a5e-1f64-4ae2-9e2c-caecc1dc05f4.json deleted file mode 100644 index 46ab1077cd8d623cfaa03be1874b06293517d4b3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/grimjim_Gigantes-v1-gemma2-9b-it/57072a5e-1f64-4ae2-9e2c-caecc1dc05f4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/grimjim_Gigantes-v1-gemma2-9b-it/1762652580.1819131", - "retrieved_timestamp": "1762652580.1819131", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "grimjim/Gigantes-v1-gemma2-9b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "grimjim/Gigantes-v1-gemma2-9b-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.692454908531585 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.597792552822268 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35318791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45547916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42253989361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/grimjim_Gigantes-v2-gemma2-9b-it/47486923-2194-4b8e-930c-ca14bd5f8a26.json b/leaderboard_data/HFOpenLLMv2/google/grimjim_Gigantes-v2-gemma2-9b-it/47486923-2194-4b8e-930c-ca14bd5f8a26.json deleted file mode 100644 index 81c4205cb13d6a522a9b016402172614e425b285..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/grimjim_Gigantes-v2-gemma2-9b-it/47486923-2194-4b8e-930c-ca14bd5f8a26.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/grimjim_Gigantes-v2-gemma2-9b-it/1762652580.182155", - "retrieved_timestamp": "1762652580.182156", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "grimjim/Gigantes-v2-gemma2-9b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "grimjim/Gigantes-v2-gemma2-9b-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7350696152874374 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5986559388303995 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20166163141993956 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45947916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4259474734042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/grimjim_Gigantes-v3-gemma2-9b-it/bb063d7a-65fa-416b-88e9-7bacdef1da3e.json b/leaderboard_data/HFOpenLLMv2/google/grimjim_Gigantes-v3-gemma2-9b-it/bb063d7a-65fa-416b-88e9-7bacdef1da3e.json deleted file mode 100644 index c4a70ca160cc43fc8de5d139e42eb970a51514ea..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/grimjim_Gigantes-v3-gemma2-9b-it/bb063d7a-65fa-416b-88e9-7bacdef1da3e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/grimjim_Gigantes-v3-gemma2-9b-it/1762652580.182362", - "retrieved_timestamp": "1762652580.1823628", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "grimjim/Gigantes-v3-gemma2-9b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "grimjim/Gigantes-v3-gemma2-9b-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.697625633319592 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5983513792324827 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20996978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565436241610738 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4608125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4226230053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/grimjim_Magnolia-v1-Gemma2-8k-9B/2cf17692-b105-41df-9783-6c7728ab778f.json b/leaderboard_data/HFOpenLLMv2/google/grimjim_Magnolia-v1-Gemma2-8k-9B/2cf17692-b105-41df-9783-6c7728ab778f.json deleted file mode 100644 index a1751ce93320e6f60ee25614ca7aefe49be81e1f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/grimjim_Magnolia-v1-Gemma2-8k-9B/2cf17692-b105-41df-9783-6c7728ab778f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v1-Gemma2-8k-9B/1762652580.1841059", - "retrieved_timestamp": "1762652580.1841059", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "grimjim/Magnolia-v1-Gemma2-8k-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "grimjim/Magnolia-v1-Gemma2-8k-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35308536904302806 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5589031767575711 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16842900302114805 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46446875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4242021276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/grimjim_Magnolia-v2-Gemma2-8k-9B/4d0574f4-4d91-4395-afff-133216eee509.json b/leaderboard_data/HFOpenLLMv2/google/grimjim_Magnolia-v2-Gemma2-8k-9B/4d0574f4-4d91-4395-afff-133216eee509.json deleted file mode 100644 index 86d80bc0f4d9e8e9f4416b136584010bd570bb84..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/grimjim_Magnolia-v2-Gemma2-8k-9B/4d0574f4-4d91-4395-afff-133216eee509.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v2-Gemma2-8k-9B/1762652580.184566", - "retrieved_timestamp": "1762652580.184567", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "grimjim/Magnolia-v2-Gemma2-8k-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "grimjim/Magnolia-v2-Gemma2-8k-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7384417789243651 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6015773428405322 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2280966767371601 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3573825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44884375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4331781914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/grimjim_Magnolia-v3-Gemma2-8k-9B/8fff2cec-a733-4505-bce9-8b605044181a.json b/leaderboard_data/HFOpenLLMv2/google/grimjim_Magnolia-v3-Gemma2-8k-9B/8fff2cec-a733-4505-bce9-8b605044181a.json deleted file mode 100644 index 221bfc0806afe9ea32f6d3eb82bc32099cd0ea67..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/grimjim_Magnolia-v3-Gemma2-8k-9B/8fff2cec-a733-4505-bce9-8b605044181a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v3-Gemma2-8k-9B/1762652580.1850398", - "retrieved_timestamp": "1762652580.185041", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "grimjim/Magnolia-v3-Gemma2-8k-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "grimjim/Magnolia-v3-Gemma2-8k-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7378422585406721 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6015406636327695 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23187311178247735 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565436241610738 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4488125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43367686170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/grimjim_Magot-v1-Gemma2-8k-9B/9e63ff64-f862-40ad-b594-31063ec0d31e.json b/leaderboard_data/HFOpenLLMv2/google/grimjim_Magot-v1-Gemma2-8k-9B/9e63ff64-f862-40ad-b594-31063ec0d31e.json deleted file mode 100644 index f4421a1c9147a815177bb35ae0d3e5c8930aca37..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/grimjim_Magot-v1-Gemma2-8k-9B/9e63ff64-f862-40ad-b594-31063ec0d31e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/grimjim_Magot-v1-Gemma2-8k-9B/1762652580.185666", - "retrieved_timestamp": "1762652580.185667", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "grimjim/Magot-v1-Gemma2-8k-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "grimjim/Magot-v1-Gemma2-8k-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29967818720993633 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6019447732218105 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09894259818731117 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44884375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43367686170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/grimjim_Magot-v2-Gemma2-8k-9B/2d250aa8-f3c5-4f9f-9e5c-dde8f720db53.json b/leaderboard_data/HFOpenLLMv2/google/grimjim_Magot-v2-Gemma2-8k-9B/2d250aa8-f3c5-4f9f-9e5c-dde8f720db53.json deleted file mode 100644 index c4f29f48b80d3bbdd4f327b0047ff47eddb93ee0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/grimjim_Magot-v2-Gemma2-8k-9B/2d250aa8-f3c5-4f9f-9e5c-dde8f720db53.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/grimjim_Magot-v2-Gemma2-8k-9B/1762652580.185882", - "retrieved_timestamp": "1762652580.1858828", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "grimjim/Magot-v2-Gemma2-8k-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "grimjim/Magot-v2-Gemma2-8k-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7347449212533854 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5896713649821103 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20166163141993956 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540268456375839 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4222905585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/hotmailuser_Gemma2Crono-27B/501e2a2c-e32c-455e-8e5f-f8bde053fddc.json b/leaderboard_data/HFOpenLLMv2/google/hotmailuser_Gemma2Crono-27B/501e2a2c-e32c-455e-8e5f-f8bde053fddc.json deleted file mode 100644 index beb48a038c142caded91df9fe94d4c603f0d399f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/hotmailuser_Gemma2Crono-27B/501e2a2c-e32c-455e-8e5f-f8bde053fddc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_Gemma2Crono-27B/1762652580.193866", - "retrieved_timestamp": "1762652580.193866", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/Gemma2Crono-27B", - "developer": "google", - "inference_platform": "unknown", - "id": "hotmailuser/Gemma2Crono-27B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7086164709637096 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6505341690680219 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24244712990936557 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37080536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45668749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4632646276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/hotmailuser_Gemma2SimPO-27B/433a8abf-8ff7-40bb-a4d0-654efdb6bf86.json b/leaderboard_data/HFOpenLLMv2/google/hotmailuser_Gemma2SimPO-27B/433a8abf-8ff7-40bb-a4d0-654efdb6bf86.json deleted file mode 100644 index 9d2f8db9249aae931546e52df89a2d2b7575c978..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/hotmailuser_Gemma2SimPO-27B/433a8abf-8ff7-40bb-a4d0-654efdb6bf86.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_Gemma2SimPO-27B/1762652580.194106", - "retrieved_timestamp": "1762652580.1941068", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/Gemma2SimPO-27B", - "developer": "google", - "inference_platform": "unknown", - "id": "hotmailuser/Gemma2SimPO-27B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7222303488078299 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6413158976157102 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28172205438066467 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35822147651006714 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44465625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46417885638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/hotmailuser_Gemma2atlas-27B/c9020f27-9175-4f12-a108-6cbff1c0cb22.json b/leaderboard_data/HFOpenLLMv2/google/hotmailuser_Gemma2atlas-27B/c9020f27-9175-4f12-a108-6cbff1c0cb22.json deleted file mode 100644 index effa3723a98b85e5c47f42829abab8acda2d044d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/hotmailuser_Gemma2atlas-27B/c9020f27-9175-4f12-a108-6cbff1c0cb22.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_Gemma2atlas-27B/1762652580.1943119", - "retrieved_timestamp": "1762652580.194313", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/Gemma2atlas-27B", - "developer": "google", - "inference_platform": "unknown", - "id": "hotmailuser/Gemma2atlas-27B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7213560020744957 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6544960921220462 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35570469798657717 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44453125000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4749833776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/hotmailuser_Gemma2magnum-27b/0ad192a1-b33f-4362-a21d-ccc590986c5c.json b/leaderboard_data/HFOpenLLMv2/google/hotmailuser_Gemma2magnum-27b/0ad192a1-b33f-4362-a21d-ccc590986c5c.json deleted file mode 100644 index ec055e1f18620caa755773c68573ce461e870021..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/hotmailuser_Gemma2magnum-27b/0ad192a1-b33f-4362-a21d-ccc590986c5c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_Gemma2magnum-27b/1762652580.1945128", - "retrieved_timestamp": "1762652580.194514", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/Gemma2magnum-27b", - "developer": "google", - "inference_platform": "unknown", - "id": "hotmailuser/Gemma2magnum-27b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5050599077115387 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6199590493843724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3850671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47234375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45960771276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/ifable_gemma-2-Ifable-9B/42b3b64b-0e15-4a49-b542-da27ab7e2143.json b/leaderboard_data/HFOpenLLMv2/google/ifable_gemma-2-Ifable-9B/42b3b64b-0e15-4a49-b542-da27ab7e2143.json deleted file mode 100644 index 2d47c7cc2f25ab02060dd5dfed016843aaed28d4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/ifable_gemma-2-Ifable-9B/42b3b64b-0e15-4a49-b542-da27ab7e2143.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ifable_gemma-2-Ifable-9B/1762652580.225604", - "retrieved_timestamp": "1762652580.225605", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ifable/gemma-2-Ifable-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "ifable/gemma-2-Ifable-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2984292787581395 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5866115556693244 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13972809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40525000000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4226230053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/jebish7_gemma-2-2b-it/86206a02-3ab9-4a86-a00c-2900e8cd2e18.json b/leaderboard_data/HFOpenLLMv2/google/jebish7_gemma-2-2b-it/86206a02-3ab9-4a86-a00c-2900e8cd2e18.json deleted file mode 100644 index 90a53be85ddaf1beff8680910444af629cf7cd98..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/jebish7_gemma-2-2b-it/86206a02-3ab9-4a86-a00c-2900e8cd2e18.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jebish7_gemma-2-2b-it/1762652580.2824588", - "retrieved_timestamp": "1762652580.2824588", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jebish7/gemma-2-2b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "jebish7/gemma-2-2b-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12717035244263 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43951564907099594 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.033987915407854986 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42444791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27152593085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/jebish7_gemma-2-9b-it/80a35d79-893b-439f-b100-a538a3c86974.json b/leaderboard_data/HFOpenLLMv2/google/jebish7_gemma-2-9b-it/80a35d79-893b-439f-b100-a538a3c86974.json deleted file mode 100644 index 8a9b186c590b94a0e016273a40b66988cf9c7880..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/jebish7_gemma-2-9b-it/80a35d79-893b-439f-b100-a538a3c86974.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jebish7_gemma-2-9b-it/1762652580.282719", - "retrieved_timestamp": "1762652580.28272", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jebish7/gemma-2-9b-it", - "developer": "google", - "inference_platform": "unknown", - "id": "jebish7/gemma-2-9b-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1557467519514887 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5949210568047724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4554479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.414311835106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-9B/9ba72d50-4321-4383-8be9-286a56607624.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-9B/9ba72d50-4321-4383-8be9-286a56607624.json deleted file mode 100644 index bd5c56febbbb82a653ef9907c2ae6cb8da834a62..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-9B/9ba72d50-4321-4383-8be9-286a56607624.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-9B/1762652580.31483", - "retrieved_timestamp": "1762652580.314831", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3008772279773224 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5931298417725773 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08534743202416918 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4424270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4226230053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-Advanced-9B/7806d1aa-b9e2-45bc-b89d-76e6c48dd3a0.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-Advanced-9B/7806d1aa-b9e2-45bc-b89d-76e6c48dd3a0.json deleted file mode 100644 index b77ef0acc3e52b24d48b298eca7af62428084514..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-Advanced-9B/7806d1aa-b9e2-45bc-b89d-76e6c48dd3a0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-Advanced-9B/1762652580.315091", - "retrieved_timestamp": "1762652580.315092", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-Advanced-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-Advanced-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5515964308036011 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5889067263184956 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19788519637462235 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3760729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4243683510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-Remix-9B/29dfbb00-8760-46d8-bef8-d036870fb0c0.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-Remix-9B/29dfbb00-8760-46d8-bef8-d036870fb0c0.json deleted file mode 100644 index 0e39f1369e6d2eee858a9746c99aa62313eb2b90..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-Remix-9B/29dfbb00-8760-46d8-bef8-d036870fb0c0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-Remix-9B/1762652580.31531", - "retrieved_timestamp": "1762652580.3153112", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-Remix-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-Remix-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7083416446140685 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5892021015046846 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20166163141993956 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3389261744966443 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4371875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42386968085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v2-9B/ca1b9625-0112-4ebf-b1c3-d2dd217d50b2.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v2-9B/ca1b9625-0112-4ebf-b1c3-d2dd217d50b2.json deleted file mode 100644 index 07a4545a4bbcd687ab559bedcc24c3e1ef101fb3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v2-9B/ca1b9625-0112-4ebf-b1c3-d2dd217d50b2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v2-9B/1762652580.315539", - "retrieved_timestamp": "1762652580.31554", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-v2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-v2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21362429464930827 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5765835815625312 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3422818791946309 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34838541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.422124335106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v2a-9B/4fa1e172-f570-4a96-b53a-8ecf31854191.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v2a-9B/4fa1e172-f570-4a96-b53a-8ecf31854191.json deleted file mode 100644 index cd1dd636aea0d20959029909b7dedcae0fdd93ee..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v2a-9B/4fa1e172-f570-4a96-b53a-8ecf31854191.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v2a-9B/1762652580.315754", - "retrieved_timestamp": "1762652580.315755", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-v2a-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-v2a-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15946909755005606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.518248966271832 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976510067114096 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31647916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35147938829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v2f-9B/fd59fb1c-3681-44d2-9172-b10891ae9c55.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v2f-9B/fd59fb1c-3681-44d2-9172-b10891ae9c55.json deleted file mode 100644 index 1323909e036d3459a29e3f9df907af0bd23f2840..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v2f-9B/fd59fb1c-3681-44d2-9172-b10891ae9c55.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v2f-9B/1762652580.315967", - "retrieved_timestamp": "1762652580.315968", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-v2f-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-v2f-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37911408396388246 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5192845467961766 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163141993957704 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3389261744966443 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3231458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3503158244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v3-Advanced-9B/778a10b0-c537-4592-9dbb-2b0de07ced4c.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v3-Advanced-9B/778a10b0-c537-4592-9dbb-2b0de07ced4c.json deleted file mode 100644 index e367ecf9dd7556eb20d1c8b960911c939ddeb2cb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v3-Advanced-9B/778a10b0-c537-4592-9dbb-2b0de07ced4c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v3-Advanced-9B/1762652580.316169", - "retrieved_timestamp": "1762652580.316169", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-v3-Advanced-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-v3-Advanced-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6601816513517467 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5935146853737787 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18731117824773413 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44496874999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41963098404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v3b-9B/d048e6ad-cc57-4ebe-8376-262564e86f0c.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v3b-9B/d048e6ad-cc57-4ebe-8376-262564e86f0c.json deleted file mode 100644 index 643f9026725a558be98635105c25e6dddc21f4a8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v3b-9B/d048e6ad-cc57-4ebe-8376-262564e86f0c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v3b-9B/1762652580.3163798", - "retrieved_timestamp": "1762652580.316381", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-v3b-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-v3b-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6809144181881852 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5907698162898164 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21525679758308158 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44887499999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4204621010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v3i-9B/53602c70-73d9-461b-b27a-24c6a1a538e5.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v3i-9B/53602c70-73d9-461b-b27a-24c6a1a538e5.json deleted file mode 100644 index 722bad59f83d9880d8a7634f41345b5d8778e6df..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v3i-9B/53602c70-73d9-461b-b27a-24c6a1a538e5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v3i-9B/1762652580.3165948", - "retrieved_timestamp": "1762652580.316596", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-v3i-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-v3i-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4203047912871182 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5625750779805955 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15332326283987915 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31806249999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41663896276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v3j-9B/d435bd27-1c26-429d-8ac5-8fd8c591a9aa.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v3j-9B/d435bd27-1c26-429d-8ac5-8fd8c591a9aa.json deleted file mode 100644 index 137075982eb90127bc6bcdb788b61599913ffd4c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v3j-9B/d435bd27-1c26-429d-8ac5-8fd8c591a9aa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v3j-9B/1762652580.3168168", - "retrieved_timestamp": "1762652580.316818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-v3j-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-v3j-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4169326276501904 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5632286961183511 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1691842900302115 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31803125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41339760638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4-Advanced-9B/c0e95e3f-37a4-4b2f-a37b-37854546c241.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4-Advanced-9B/c0e95e3f-37a4-4b2f-a37b-37854546c241.json deleted file mode 100644 index 780ec306b06d01408ed368c6f95e1abdfa090081..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4-Advanced-9B/c0e95e3f-37a4-4b2f-a37b-37854546c241.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v4-Advanced-9B/1762652580.317157", - "retrieved_timestamp": "1762652580.3171608", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-v4-Advanced-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-v4-Advanced-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7015474496558022 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6023627309683861 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21525679758308158 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3389261744966443 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4580520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4366688829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4a-Advanced-9B/b84aedba-7b87-445d-87c2-b029cb0038c3.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4a-Advanced-9B/b84aedba-7b87-445d-87c2-b029cb0038c3.json deleted file mode 100644 index 848eef8ccc2617f82aaab208b90989e18ca24573..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4a-Advanced-9B/b84aedba-7b87-445d-87c2-b029cb0038c3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v4a-Advanced-9B/1762652580.317515", - "retrieved_timestamp": "1762652580.317516", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-v4a-Advanced-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-v4a-Advanced-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7135123694020753 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.598838715496553 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21148036253776434 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44890625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4309341755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4b-9B/41f04f45-2f1d-42fd-87de-cc5e484cada2.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4b-9B/41f04f45-2f1d-42fd-87de-cc5e484cada2.json deleted file mode 100644 index a6d98fe5049cb8df7856388b9a6b72fd1f4ce2b9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4b-9B/41f04f45-2f1d-42fd-87de-cc5e484cada2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v4b-9B/1762652580.317803", - "retrieved_timestamp": "1762652580.317804", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-v4b-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-v4b-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6878338364428604 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6039158192304305 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23338368580060423 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060402684563756 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45547916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4356715425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4c-9B/9499ec24-5be2-478c-b13e-3102d1555668.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4c-9B/9499ec24-5be2-478c-b13e-3102d1555668.json deleted file mode 100644 index 4bc386bd597e35dcf53b73e602f5e4a54a37173f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4c-9B/9499ec24-5be2-478c-b13e-3102d1555668.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v4c-9B/1762652580.318075", - "retrieved_timestamp": "1762652580.318076", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-v4c-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-v4c-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6945282960323054 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6084319292299174 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22658610271903323 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45278124999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43949468085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4d-9B/7e6685d8-af21-4810-a9cc-edb296f4b937.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4d-9B/7e6685d8-af21-4810-a9cc-edb296f4b937.json deleted file mode 100644 index ad5a8c68d73dc82cbc516b1bc894a92092508de0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4d-9B/7e6685d8-af21-4810-a9cc-edb296f4b937.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v4d-9B/1762652580.318495", - "retrieved_timestamp": "1762652580.318496", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lemon07r/Gemma-2-Ataraxy-v4d-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "lemon07r/Gemma-2-Ataraxy-v4d-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7250029920610646 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6054158192304304 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23338368580060423 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4541458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4345910904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/lkoenig_BBAI_200_Gemma/b71c5ede-010d-4ce4-9f12-552388e2d9eb.json b/leaderboard_data/HFOpenLLMv2/google/lkoenig_BBAI_200_Gemma/b71c5ede-010d-4ce4-9f12-552388e2d9eb.json deleted file mode 100644 index b7d2d0d7db5b2a5a50ff1a5872f940c4bc4fa7f7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/lkoenig_BBAI_200_Gemma/b71c5ede-010d-4ce4-9f12-552388e2d9eb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_200_Gemma/1762652580.32272", - "retrieved_timestamp": "1762652580.32272", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lkoenig/BBAI_200_Gemma", - "developer": "google", - "inference_platform": "unknown", - "id": "lkoenig/BBAI_200_Gemma" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07051733843978422 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3449044607726533 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36311458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16788563829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 19.3 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/monsterapi_gemma-2-2b-LoRA-MonsterInstruct/f5395aa2-334b-410c-a2ee-4d7381f1c9bc.json b/leaderboard_data/HFOpenLLMv2/google/monsterapi_gemma-2-2b-LoRA-MonsterInstruct/f5395aa2-334b-410c-a2ee-4d7381f1c9bc.json deleted file mode 100644 index fc07be68fe1d818626ab0b126d9590e23737ccf3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/monsterapi_gemma-2-2b-LoRA-MonsterInstruct/f5395aa2-334b-410c-a2ee-4d7381f1c9bc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/monsterapi_gemma-2-2b-LoRA-MonsterInstruct/1762652580.372597", - "retrieved_timestamp": "1762652580.372598", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "monsterapi/gemma-2-2b-LoRA-MonsterInstruct", - "developer": "google", - "inference_platform": "unknown", - "id": "monsterapi/gemma-2-2b-LoRA-MonsterInstruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3902545246612322 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36496861927498697 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3643854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19872007978723405 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/nbeerbower_Gemma2-Gutenberg-Doppel-9B/b6514bef-f106-45e0-8571-da3507b0e95b.json b/leaderboard_data/HFOpenLLMv2/google/nbeerbower_Gemma2-Gutenberg-Doppel-9B/b6514bef-f106-45e0-8571-da3507b0e95b.json deleted file mode 100644 index 86a654c2aa0ddb8ae5cfc508684c4d30cbfe975c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/nbeerbower_Gemma2-Gutenberg-Doppel-9B/b6514bef-f106-45e0-8571-da3507b0e95b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Gemma2-Gutenberg-Doppel-9B/1762652580.378716", - "retrieved_timestamp": "1762652580.378717", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Gemma2-Gutenberg-Doppel-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "nbeerbower/Gemma2-Gutenberg-Doppel-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7171094917042337 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5870114193661848 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19788519637462235 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3296979865771812 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46078125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41273271276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/nbeerbower_gemma2-gutenberg-27B/b0a9fb09-2637-4b4c-9d78-7dc8d9c6aad2.json b/leaderboard_data/HFOpenLLMv2/google/nbeerbower_gemma2-gutenberg-27B/b0a9fb09-2637-4b4c-9d78-7dc8d9c6aad2.json deleted file mode 100644 index 54c32c1194eac9632ce41a6ea4badfa2e54c14ee..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/nbeerbower_gemma2-gutenberg-27B/b0a9fb09-2637-4b4c-9d78-7dc8d9c6aad2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_gemma2-gutenberg-27B/1762652580.384448", - "retrieved_timestamp": "1762652580.3844512", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/gemma2-gutenberg-27B", - "developer": "google", - "inference_platform": "unknown", - "id": "nbeerbower/gemma2-gutenberg-27B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29470804133033685 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37965683503451614 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3727291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19822140957446807 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 27.227 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/nbeerbower_gemma2-gutenberg-9B/14dc56ff-7f3b-430e-a4b3-6e4c9961fea3.json b/leaderboard_data/HFOpenLLMv2/google/nbeerbower_gemma2-gutenberg-9B/14dc56ff-7f3b-430e-a4b3-6e4c9961fea3.json deleted file mode 100644 index ff31bbd2402f3b3def4d04015b53bd505190b328..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/nbeerbower_gemma2-gutenberg-9B/14dc56ff-7f3b-430e-a4b3-6e4c9961fea3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_gemma2-gutenberg-9B/1762652580.384712", - "retrieved_timestamp": "1762652580.384713", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/gemma2-gutenberg-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "nbeerbower/gemma2-gutenberg-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2795948084416016 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5950904001490335 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08081570996978851 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45951041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4192154255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/nhyha_N3N_gemma-2-9b-it_20241029_1532/cb85dee2-acee-48f8-85aa-1d5664179fd5.json b/leaderboard_data/HFOpenLLMv2/google/nhyha_N3N_gemma-2-9b-it_20241029_1532/cb85dee2-acee-48f8-85aa-1d5664179fd5.json deleted file mode 100644 index de663d3ec4d050190d770145cac5f661b12a4a7f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/nhyha_N3N_gemma-2-9b-it_20241029_1532/cb85dee2-acee-48f8-85aa-1d5664179fd5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nhyha_N3N_gemma-2-9b-it_20241029_1532/1762652580.4059799", - "retrieved_timestamp": "1762652580.4059808", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nhyha/N3N_gemma-2-9b-it_20241029_1532", - "developer": "google", - "inference_platform": "unknown", - "id": "nhyha/N3N_gemma-2-9b-it_20241029_1532" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6751940407008958 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5863124381827675 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2122356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060402684563756 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4593541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4122340425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/nhyha_N3N_gemma-2-9b-it_20241110_2026/4c450b48-8477-45cb-9cfa-814c21dd39d7.json b/leaderboard_data/HFOpenLLMv2/google/nhyha_N3N_gemma-2-9b-it_20241110_2026/4c450b48-8477-45cb-9cfa-814c21dd39d7.json deleted file mode 100644 index 9ec2d6cd1b1ed0edeef3b58ab16e01a55b97c121..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/nhyha_N3N_gemma-2-9b-it_20241110_2026/4c450b48-8477-45cb-9cfa-814c21dd39d7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nhyha_N3N_gemma-2-9b-it_20241110_2026/1762652580.406234", - "retrieved_timestamp": "1762652580.406235", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nhyha/N3N_gemma-2-9b-it_20241110_2026", - "developer": "google", - "inference_platform": "unknown", - "id": "nhyha/N3N_gemma-2-9b-it_20241110_2026" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6282829558903709 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5867149609980419 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1608761329305136 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40730208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40201130319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/nidum_Nidum-Limitless-Gemma-2B/49e352c1-2319-4bc5-aa3f-1697739a05b8.json b/leaderboard_data/HFOpenLLMv2/google/nidum_Nidum-Limitless-Gemma-2B/49e352c1-2319-4bc5-aa3f-1697739a05b8.json deleted file mode 100644 index 4268cf870bd3c518f358466ffcf2d73e04a798b9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/nidum_Nidum-Limitless-Gemma-2B/49e352c1-2319-4bc5-aa3f-1697739a05b8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nidum_Nidum-Limitless-Gemma-2B/1762652580.406632", - "retrieved_timestamp": "1762652580.406633", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nidum/Nidum-Limitless-Gemma-2B", - "developer": "google", - "inference_platform": "unknown", - "id": "nidum/Nidum-Limitless-Gemma-2B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24235140538216376 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3078801520076317 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37403125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11735372340425532 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GemmaForCausalLM", - "params_billions": 2.506 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/noname0202_gemma-2-2b-it-ties/42bed40b-ac71-42c8-b56b-47d1f930c736.json b/leaderboard_data/HFOpenLLMv2/google/noname0202_gemma-2-2b-it-ties/42bed40b-ac71-42c8-b56b-47d1f930c736.json deleted file mode 100644 index 9cc58ee0ff5edbf46d2c3384e87a112c9982dd1c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/noname0202_gemma-2-2b-it-ties/42bed40b-ac71-42c8-b56b-47d1f930c736.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/noname0202_gemma-2-2b-it-ties/1762652580.4097438", - "retrieved_timestamp": "1762652580.409745", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "noname0202/gemma-2-2b-it-ties", - "developer": "google", - "inference_platform": "unknown", - "id": "noname0202/gemma-2-2b-it-ties" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12657083205893696 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42057403060290816 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39288541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2560671542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/princeton-nlp_gemma-2-9b-it-SimPO/4285b38c-aba8-444b-9b0b-b265c7b1fef1.json b/leaderboard_data/HFOpenLLMv2/google/princeton-nlp_gemma-2-9b-it-SimPO/4285b38c-aba8-444b-9b0b-b265c7b1fef1.json deleted file mode 100644 index d17aed9d2754b6734e25400ffd2214e2cb2dd7fe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/princeton-nlp_gemma-2-9b-it-SimPO/4285b38c-aba8-444b-9b0b-b265c7b1fef1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_gemma-2-9b-it-SimPO/1762652580.454763", - "retrieved_timestamp": "1762652580.4547682", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/gemma-2-9b-it-SimPO", - "developer": "google", - "inference_platform": "unknown", - "id": "princeton-nlp/gemma-2-9b-it-SimPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3206857803960159 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5839179923162123 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07099697885196375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41232291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39752327127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/qq8933_OpenLongCoT-Base-Gemma2-2B/c945b9b5-7b46-4300-adcc-2d6c94df0ac1.json b/leaderboard_data/HFOpenLLMv2/google/qq8933_OpenLongCoT-Base-Gemma2-2B/c945b9b5-7b46-4300-adcc-2d6c94df0ac1.json deleted file mode 100644 index 1a960d5f0572403e1e4464b3f4bcc60de81b7679..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/qq8933_OpenLongCoT-Base-Gemma2-2B/c945b9b5-7b46-4300-adcc-2d6c94df0ac1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qq8933_OpenLongCoT-Base-Gemma2-2B/1762652580.488883", - "retrieved_timestamp": "1762652580.488883", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qq8933/OpenLongCoT-Base-Gemma2-2B", - "developer": "google", - "inference_platform": "unknown", - "id": "qq8933/OpenLongCoT-Base-Gemma2-2B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1965141380426158 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3106362870893106 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32225 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1315658244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 3.204 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/recoilme_Gemma-2-Ataraxy-Gemmasutra-9B-slerp/054a662a-e425-448c-9556-6998833e51ff.json b/leaderboard_data/HFOpenLLMv2/google/recoilme_Gemma-2-Ataraxy-Gemmasutra-9B-slerp/054a662a-e425-448c-9556-6998833e51ff.json deleted file mode 100644 index 13538a71529710db6ca718c9ef103921cddb9f53..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/recoilme_Gemma-2-Ataraxy-Gemmasutra-9B-slerp/054a662a-e425-448c-9556-6998833e51ff.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/recoilme_Gemma-2-Ataraxy-Gemmasutra-9B-slerp/1762652580.491333", - "retrieved_timestamp": "1762652580.491333", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp", - "developer": "google", - "inference_platform": "unknown", - "id": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7648949232480928 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.597438766061506 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4244791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4207114361702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/recoilme_Gemma-2-Ataraxy-Gemmasutra-9B-slerp/0a685d8f-38c7-4521-9613-7b36ad1cac73.json b/leaderboard_data/HFOpenLLMv2/google/recoilme_Gemma-2-Ataraxy-Gemmasutra-9B-slerp/0a685d8f-38c7-4521-9613-7b36ad1cac73.json deleted file mode 100644 index b6c5609aa326d3e4f20fbc4f74166691ee7818be..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/recoilme_Gemma-2-Ataraxy-Gemmasutra-9B-slerp/0a685d8f-38c7-4521-9613-7b36ad1cac73.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/recoilme_Gemma-2-Ataraxy-Gemmasutra-9B-slerp/1762652580.491603", - "retrieved_timestamp": "1762652580.491603", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp", - "developer": "google", - "inference_platform": "unknown", - "id": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28536505361330156 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5983926033872208 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10045317220543806 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3296979865771812 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46065625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4162234042553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.1/d31a41b0-6500-4e1b-8435-b9d3e9725c02.json b/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.1/d31a41b0-6500-4e1b-8435-b9d3e9725c02.json deleted file mode 100644 index 70a862a7728aab34f128f730f1b7903f001528ab..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.1/d31a41b0-6500-4e1b-8435-b9d3e9725c02.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.1/1762652580.491797", - "retrieved_timestamp": "1762652580.491798", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "recoilme/recoilme-gemma-2-9B-v0.1", - "developer": "google", - "inference_platform": "unknown", - "id": "recoilme/recoilme-gemma-2-9B-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.751506004069203 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5995309756292291 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3389261744966443 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41914583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4158909574468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.2/5826c93f-3642-44cf-b385-4a5ab5103086.json b/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.2/5826c93f-3642-44cf-b385-4a5ab5103086.json deleted file mode 100644 index 3e67a6fa1eab3dcee5fb9d571700d2bb112692b3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.2/5826c93f-3642-44cf-b385-4a5ab5103086.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.2/1762652580.4922318", - "retrieved_timestamp": "1762652580.492233", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "recoilme/recoilme-gemma-2-9B-v0.2", - "developer": "google", - "inference_platform": "unknown", - "id": "recoilme/recoilme-gemma-2-9B-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2746989100032359 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6030832642626502 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46859375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4122340425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.2/6a15378c-36cc-4f5e-b184-5a19a6fbb192.json b/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.2/6a15378c-36cc-4f5e-b184-5a19a6fbb192.json deleted file mode 100644 index dc617e475e34da317650569cb0f474e34b09f6ff..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.2/6a15378c-36cc-4f5e-b184-5a19a6fbb192.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.2/1762652580.492019", - "retrieved_timestamp": "1762652580.49202", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "recoilme/recoilme-gemma-2-9B-v0.2", - "developer": "google", - "inference_platform": "unknown", - "id": "recoilme/recoilme-gemma-2-9B-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7591745457608035 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6025964285724085 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.409875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41630651595744683 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.3/47cfe707-ba31-4c9b-aa15-9ab8b566e206.json b/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.3/47cfe707-ba31-4c9b-aa15-9ab8b566e206.json deleted file mode 100644 index 19c46567c7647d43e8f412b30653981644d18f2c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.3/47cfe707-ba31-4c9b-aa15-9ab8b566e206.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.3/1762652580.492416", - "retrieved_timestamp": "1762652580.492416", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "recoilme/recoilme-gemma-2-9B-v0.3", - "developer": "google", - "inference_platform": "unknown", - "id": "recoilme/recoilme-gemma-2-9B-v0.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.743937197746424 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5992527878628748 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08761329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4203854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4072473404255319 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.3/8d3bd687-89f5-4d62-af46-93646aea4341.json b/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.3/8d3bd687-89f5-4d62-af46-93646aea4341.json deleted file mode 100644 index 9dee920187aa5a737ff56cdc72c0eccef0815183..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.3/8d3bd687-89f5-4d62-af46-93646aea4341.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.3/1762652580.492666", - "retrieved_timestamp": "1762652580.492667", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "recoilme/recoilme-gemma-2-9B-v0.3", - "developer": "google", - "inference_platform": "unknown", - "id": "recoilme/recoilme-gemma-2-9B-v0.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.57607592299543 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6019827101058847 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18882175226586104 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46322916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4039228723404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.4/28eef1b7-a83e-49c9-8f11-ef9e4ae7e1ce.json b/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.4/28eef1b7-a83e-49c9-8f11-ef9e4ae7e1ce.json deleted file mode 100644 index 26aa56d1c7fb267a171921ab3bffbfc550013c7a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.4/28eef1b7-a83e-49c9-8f11-ef9e4ae7e1ce.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.4/1762652580.4928808", - "retrieved_timestamp": "1762652580.492882", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "recoilme/recoilme-gemma-2-9B-v0.4", - "developer": "google", - "inference_platform": "unknown", - "id": "recoilme/recoilme-gemma-2-9B-v0.4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2561891337207498 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5967285833554881 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060402684563756 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4726875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4405751329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.5/8fe5a1e8-1491-4e64-8aed-32e73f2dae6e.json b/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.5/8fe5a1e8-1491-4e64-8aed-32e73f2dae6e.json deleted file mode 100644 index 00374765572366b1d089bfc9194e29b4bf3440e1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.5/8fe5a1e8-1491-4e64-8aed-32e73f2dae6e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.5/1762652580.4931269", - "retrieved_timestamp": "1762652580.493134", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "recoilme/recoilme-gemma-2-9B-v0.5", - "developer": "google", - "inference_platform": "unknown", - "id": "recoilme/recoilme-gemma-2-9B-v0.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7664186580495308 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5981472549925003 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21148036253776434 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4231770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41996343085106386 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/sequelbox_gemma-2-9B-MOTH/4bdefb85-2413-43b7-8938-869ad0cff58f.json b/leaderboard_data/HFOpenLLMv2/google/sequelbox_gemma-2-9B-MOTH/4bdefb85-2413-43b7-8938-869ad0cff58f.json deleted file mode 100644 index b7e8a93e4433ac86ab62c946eac76263b683f753..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/sequelbox_gemma-2-9B-MOTH/4bdefb85-2413-43b7-8938-869ad0cff58f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sequelbox_gemma-2-9B-MOTH/1762652580.5126731", - "retrieved_timestamp": "1762652580.512674", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sequelbox/gemma-2-9B-MOTH", - "developer": "google", - "inference_platform": "unknown", - "id": "sequelbox/gemma-2-9B-MOTH" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20588150551647405 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30797000521562534 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3409479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11402925531914894 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/wzhouad_gemma-2-9b-it-WPO-HB/70fe199f-6c81-4d99-a595-208b7abc321f.json b/leaderboard_data/HFOpenLLMv2/google/wzhouad_gemma-2-9b-it-WPO-HB/70fe199f-6c81-4d99-a595-208b7abc321f.json deleted file mode 100644 index 74263acd90c9f652662d56c5d0ec9927008ff27a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/wzhouad_gemma-2-9b-it-WPO-HB/70fe199f-6c81-4d99-a595-208b7abc321f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/wzhouad_gemma-2-9b-it-WPO-HB/1762652580.596365", - "retrieved_timestamp": "1762652580.5963662", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "wzhouad/gemma-2-9b-it-WPO-HB", - "developer": "google", - "inference_platform": "unknown", - "id": "wzhouad/gemma-2-9b-it-WPO-HB" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5437029304467702 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5628624376751974 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15332326283987915 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3498322147651007 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3674583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33602061170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-ORPO-jpn-it-abliterated-18-merge/9c7a213f-e5f8-4cc2-9cbe-d61db2cf2bbe.json b/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-ORPO-jpn-it-abliterated-18-merge/9c7a213f-e5f8-4cc2-9cbe-d61db2cf2bbe.json deleted file mode 100644 index 2cd22138a8e9202be87e502bec7565ba908aa850..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-ORPO-jpn-it-abliterated-18-merge/9c7a213f-e5f8-4cc2-9cbe-d61db2cf2bbe.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-ORPO-jpn-it-abliterated-18-merge/1762652580.609323", - "retrieved_timestamp": "1762652580.609324", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18-merge", - "developer": "google", - "inference_platform": "unknown", - "id": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18-merge" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5218209905273563 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.414688942270627 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35139583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24609375 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-ORPO-jpn-it-abliterated-18/23800723-b5bd-4fc6-9d07-ca937c8680c6.json b/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-ORPO-jpn-it-abliterated-18/23800723-b5bd-4fc6-9d07-ca937c8680c6.json deleted file mode 100644 index 20e1bc4ed611432d30a06aac8493ccc6d8000e11..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-ORPO-jpn-it-abliterated-18/23800723-b5bd-4fc6-9d07-ca937c8680c6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-ORPO-jpn-it-abliterated-18/1762652580.6090298", - "retrieved_timestamp": "1762652580.609031", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18", - "developer": "google", - "inference_platform": "unknown", - "id": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4630945890237902 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4052902505118913 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3754270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23445811170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-17-18-24/7321bd04-6f20-427a-8219-0ff2e299cb01.json b/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-17-18-24/7321bd04-6f20-427a-8219-0ff2e299cb01.json deleted file mode 100644 index 1d85b8c3b91b65562608327add5d7c85a09852f5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-17-18-24/7321bd04-6f20-427a-8219-0ff2e299cb01.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-17-18-24/1762652580.609858", - "retrieved_timestamp": "1762652580.609859", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ymcki/gemma-2-2b-jpn-it-abliterated-17-18-24", - "developer": "google", - "inference_platform": "unknown", - "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17-18-24" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.505484337114412 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38123590457353557 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0256797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35015625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2282247340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca/3cc8621a-b38c-4735-af09-027989774289.json b/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca/3cc8621a-b38c-4735-af09-027989774289.json deleted file mode 100644 index aac637ccc83c5f24ed4026a17f576a4372cca891..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca/3cc8621a-b38c-4735-af09-027989774289.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca/1762652580.6102881", - "retrieved_timestamp": "1762652580.6102889", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca", - "developer": "google", - "inference_platform": "unknown", - "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30647349033896726 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40715971926711275 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39691666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2249002659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-17-ORPO/44b47789-f529-4bae-9e87-196abc325efc.json b/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-17-ORPO/44b47789-f529-4bae-9e87-196abc325efc.json deleted file mode 100644 index 195b074417c46e7dd07e5f7a6d565510147ac23c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-17-ORPO/44b47789-f529-4bae-9e87-196abc325efc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-17-ORPO/1762652580.610075", - "retrieved_timestamp": "1762652580.610076", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO", - "developer": "google", - "inference_platform": "unknown", - "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47478468242042227 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38979797271028965 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37676041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21908244680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-17/5958a61d-bf39-4de4-bfe1-6a6db2f37f55.json b/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-17/5958a61d-bf39-4de4-bfe1-6a6db2f37f55.json deleted file mode 100644 index 7e09f976f96d8c9bec94453d22992b3a3687cbc2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-17/5958a61d-bf39-4de4-bfe1-6a6db2f37f55.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-17/1762652580.609628", - "retrieved_timestamp": "1762652580.609628", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ymcki/gemma-2-2b-jpn-it-abliterated-17", - "developer": "google", - "inference_platform": "unknown", - "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5081572449988254 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40762664531580056 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37006249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2455119680851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-18-ORPO/c91ab7d1-b36e-45ca-8f1e-ad9ef0c38100.json b/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-18-ORPO/c91ab7d1-b36e-45ca-8f1e-ad9ef0c38100.json deleted file mode 100644 index 5f7750677b282dd6682132b8ba4bd6ad0e706631..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-18-ORPO/c91ab7d1-b36e-45ca-8f1e-ad9ef0c38100.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-18-ORPO/1762652580.610698", - "retrieved_timestamp": "1762652580.610699", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ymcki/gemma-2-2b-jpn-it-abliterated-18-ORPO", - "developer": "google", - "inference_platform": "unknown", - "id": "ymcki/gemma-2-2b-jpn-it-abliterated-18-ORPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47423502972113984 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40389353402379324 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3953333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21850066489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-18/78f235b0-fa98-48e2-bb03-9f7e9f986004.json b/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-18/78f235b0-fa98-48e2-bb03-9f7e9f986004.json deleted file mode 100644 index 2314c9e0cc9929f9de5dd0317a7dab8a7aa69e0f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-18/78f235b0-fa98-48e2-bb03-9f7e9f986004.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-18/1762652580.610494", - "retrieved_timestamp": "1762652580.610495", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ymcki/gemma-2-2b-jpn-it-abliterated-18", - "developer": "google", - "inference_platform": "unknown", - "id": "ymcki/gemma-2-2b-jpn-it-abliterated-18" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5175246124726836 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4132188791645781 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0445619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37415624999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25049867021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-24/4f0262d9-2a01-4127-bb40-1bbf437bbc07.json b/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-24/4f0262d9-2a01-4127-bb40-1bbf437bbc07.json deleted file mode 100644 index 74e31d307bd6a03504d319c8714777bacd212776..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-24/4f0262d9-2a01-4127-bb40-1bbf437bbc07.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-24/1762652580.610902", - "retrieved_timestamp": "1762652580.610903", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ymcki/gemma-2-2b-jpn-it-abliterated-24", - "developer": "google", - "inference_platform": "unknown", - "id": "ymcki/gemma-2-2b-jpn-it-abliterated-24" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49786566310722213 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41096027770392857 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39148958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2473404255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zake7749_gemma-2-9b-it-chinese-kyara/827af354-0efb-4a44-b62a-c8562fd0065b.json b/leaderboard_data/HFOpenLLMv2/google/zake7749_gemma-2-9b-it-chinese-kyara/827af354-0efb-4a44-b62a-c8562fd0065b.json deleted file mode 100644 index d820e36bfd9482a4022326b91ff523051a75d920..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zake7749_gemma-2-9b-it-chinese-kyara/827af354-0efb-4a44-b62a-c8562fd0065b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zake7749_gemma-2-9b-it-chinese-kyara/1762652580.612564", - "retrieved_timestamp": "1762652580.612565", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zake7749/gemma-2-9b-it-chinese-kyara", - "developer": "google", - "inference_platform": "unknown", - "id": "zake7749/gemma-2-9b-it-chinese-kyara" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17642965110351644 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5953692987878404 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10498489425981873 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4241979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41788563829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_Gemma-2-TM-9B/4d3c877e-3dea-44af-8133-d555355971f8.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_Gemma-2-TM-9B/4d3c877e-3dea-44af-8133-d555355971f8.json deleted file mode 100644 index 382c5b725cfdb3ec0163eb26882f487e2aeab67c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_Gemma-2-TM-9B/4d3c877e-3dea-44af-8133-d555355971f8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_Gemma-2-TM-9B/1762652580.612811", - "retrieved_timestamp": "1762652580.612811", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/Gemma-2-TM-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/Gemma-2-TM-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8044621604010691 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5986592993557701 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20241691842900303 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41523958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40882646276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen1-gemma-2-9B/119f453d-714d-4324-aac5-8448bab91771.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen1-gemma-2-9B/119f453d-714d-4324-aac5-8448bab91771.json deleted file mode 100644 index b97807cd85096add27d841ff2d341a25cf0472d9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen1-gemma-2-9B/119f453d-714d-4324-aac5-8448bab91771.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen1-gemma-2-9B/1762652580.613055", - "retrieved_timestamp": "1762652580.613056", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT-Gen1-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Gen1-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7886252920029965 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6099997385328262 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22205438066465258 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4216875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4380817819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen2-GI-gemma-2-9B/0cf7e394-67e2-4ca3-ab2e-00cd4165eaf8.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen2-GI-gemma-2-9B/0cf7e394-67e2-4ca3-ab2e-00cd4165eaf8.json deleted file mode 100644 index e3a3d43605aca663f28682676b8793734dca11fc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen2-GI-gemma-2-9B/0cf7e394-67e2-4ca3-ab2e-00cd4165eaf8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen2-GI-gemma-2-9B/1762652580.613308", - "retrieved_timestamp": "1762652580.613309", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT-Gen2-GI-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Gen2-GI-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7913979352562313 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6095558882654465 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42832291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43558843085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen2-gemma-2-9B/6f5cbf98-67b4-4651-acee-160fe2e36f59.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen2-gemma-2-9B/6f5cbf98-67b4-4651-acee-160fe2e36f59.json deleted file mode 100644 index 00e616cc7b8ffb9c6ba0f5d56555c49417f08555..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen2-gemma-2-9B/6f5cbf98-67b4-4651-acee-160fe2e36f59.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen2-gemma-2-9B/1762652580.613527", - "retrieved_timestamp": "1762652580.613528", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT-Gen2-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Gen2-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7907485471881275 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6100494662695 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4322916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4387466755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen3-gemma-2-9B/79319862-c5eb-40a1-9424-ecc3835c1c9e.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen3-gemma-2-9B/79319862-c5eb-40a1-9424-ecc3835c1c9e.json deleted file mode 100644 index 7de311dfec37c38ae434c9799d5c13335068b6a7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen3-gemma-2-9B/79319862-c5eb-40a1-9424-ecc3835c1c9e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen3-gemma-2-9B/1762652580.613742", - "retrieved_timestamp": "1762652580.613743", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT-Gen3-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Gen3-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8020142111818863 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6097112889343964 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4216875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43558843085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen4-gemma-2-9B/7442a4c1-e225-4cea-b107-2d975460e214.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen4-gemma-2-9B/7442a4c1-e225-4cea-b107-2d975460e214.json deleted file mode 100644 index e8088598e56c027380156297898b62ed9a5134d1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen4-gemma-2-9B/7442a4c1-e225-4cea-b107-2d975460e214.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen4-gemma-2-9B/1762652580.613958", - "retrieved_timestamp": "1762652580.6139588", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT-Gen4-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Gen4-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7883005979689446 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6109884725351095 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22356495468277945 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3548657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4228020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4387466755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen5-gemma-2-9B/4431b126-a8b8-4776-8dd5-448ec4fb0caf.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen5-gemma-2-9B/4431b126-a8b8-4776-8dd5-448ec4fb0caf.json deleted file mode 100644 index 3f2ca2a0cf105aa28bec766e00c1906896f8f135..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen5-gemma-2-9B/4431b126-a8b8-4776-8dd5-448ec4fb0caf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen5-gemma-2-9B/1762652580.614163", - "retrieved_timestamp": "1762652580.614163", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT-Gen5-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Gen5-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7923221496739761 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6132787046647334 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21525679758308158 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42016666666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4402426861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen6-gemma-2-9B/2dc22f82-e2fb-4690-b8e6-8c77b9bc9c45.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen6-gemma-2-9B/2dc22f82-e2fb-4690-b8e6-8c77b9bc9c45.json deleted file mode 100644 index a341a3170da30123fb296ff13b8d81ddbce2fb5d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen6-gemma-2-9B/2dc22f82-e2fb-4690-b8e6-8c77b9bc9c45.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen6-gemma-2-9B/1762652580.614364", - "retrieved_timestamp": "1762652580.6143649", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT-Gen6-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Gen6-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1615668648075994 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5844669261858688 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0823262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40692708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4165558510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen6fix-gemma-2-9B/0c2ec793-573d-4fb5-abc3-4aef4a8e2e72.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen6fix-gemma-2-9B/0c2ec793-573d-4fb5-abc3-4aef4a8e2e72.json deleted file mode 100644 index e31b363e11206a27e3471fa4e3c0f395edf403ae..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen6fix-gemma-2-9B/0c2ec793-573d-4fb5-abc3-4aef4a8e2e72.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen6fix-gemma-2-9B/1762652580.614617", - "retrieved_timestamp": "1762652580.614618", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT-Gen6fix-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Gen6fix-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15759518078697854 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5917309697578781 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40841666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4119847074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen7-gemma-2-9B/29e65163-3e59-4bfe-a950-60092cb3171f.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen7-gemma-2-9B/29e65163-3e59-4bfe-a950-60092cb3171f.json deleted file mode 100644 index 6022ec6903cedd7faf81879be8ff8f47a4d4ee63..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen7-gemma-2-9B/29e65163-3e59-4bfe-a950-60092cb3171f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen7-gemma-2-9B/1762652580.614857", - "retrieved_timestamp": "1762652580.614858", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT-Gen7-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Gen7-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16641289556155447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5935242633580781 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40978125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4122340425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Max-Merge_02012025163610-gemma-2-9B/bfeb5972-e865-4892-b01b-0c92fdab79e9.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Max-Merge_02012025163610-gemma-2-9B/bfeb5972-e865-4892-b01b-0c92fdab79e9.json deleted file mode 100644 index 0e2e8786d357247e09557b746269cb368f18628d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Max-Merge_02012025163610-gemma-2-9B/bfeb5972-e865-4892-b01b-0c92fdab79e9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Max-Merge_02012025163610-gemma-2-9B/1762652580.6150799", - "retrieved_timestamp": "1762652580.615081", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT-Max-Merge_02012025163610-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Max-Merge_02012025163610-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7907485471881275 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6142243374633075 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2212990936555891 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4228020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4395777925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge-gemma-2-9B/8025c7ed-3553-489f-8858-091d1ff81a15.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge-gemma-2-9B/8025c7ed-3553-489f-8858-091d1ff81a15.json deleted file mode 100644 index fba2dc13cd0690565cb2e225fefbef73183b138c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge-gemma-2-9B/8025c7ed-3553-489f-8858-091d1ff81a15.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge-gemma-2-9B/1762652580.615297", - "retrieved_timestamp": "1762652580.615297", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT-Merge-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Merge-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8035379459833243 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6118379158679297 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34815436241610737 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.425625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43617021276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge1-gemma-2-9B/0e6d9dcd-e9b7-4638-ac0a-d0600fbb27d8.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge1-gemma-2-9B/0e6d9dcd-e9b7-4638-ac0a-d0600fbb27d8.json deleted file mode 100644 index 20019f265c68a1996ab721f261eb4572ea1cac0c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge1-gemma-2-9B/0e6d9dcd-e9b7-4638-ac0a-d0600fbb27d8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge1-gemma-2-9B/1762652580.615506", - "retrieved_timestamp": "1762652580.615506", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT-Merge1-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Merge1-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7901490268044344 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6099997385328262 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22885196374622357 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4243854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43741688829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge2-MU-gemma-2-MTg2MT1g2-9B/b149c82e-0099-46f6-a302-0eac4127f418.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge2-MU-gemma-2-MTg2MT1g2-9B/b149c82e-0099-46f6-a302-0eac4127f418.json deleted file mode 100644 index 9c3466b8e012c7f4979eaf59207c9312d66e0acd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge2-MU-gemma-2-MTg2MT1g2-9B/b149c82e-0099-46f6-a302-0eac4127f418.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge2-MU-gemma-2-MTg2MT1g2-9B/1762652580.615718", - "retrieved_timestamp": "1762652580.615718", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT-Merge2-MU-gemma-2-MTg2MT1g2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Merge2-MU-gemma-2-MTg2MT1g2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7955945779420825 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.60838922159878 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21827794561933533 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43222916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.437250664893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge2-gemma-2-9B/75c81dae-2bb9-4d60-94e2-61141c31ccbd.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge2-gemma-2-9B/75c81dae-2bb9-4d60-94e2-61141c31ccbd.json deleted file mode 100644 index 483bd8027994e052734c659506bccd80a72989a4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge2-gemma-2-9B/75c81dae-2bb9-4d60-94e2-61141c31ccbd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge2-gemma-2-9B/1762652580.615932", - "retrieved_timestamp": "1762652580.615933", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT-Merge2-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Merge2-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7877010775852515 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6106681877306871 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2348942598187311 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4216875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43816489361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge3-gemma-2-9B/c2bad77e-c0d0-4a43-8853-9363cc618603.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge3-gemma-2-9B/c2bad77e-c0d0-4a43-8853-9363cc618603.json deleted file mode 100644 index bbb9c2c47598d2270f89283bb24dba550e3065c1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge3-gemma-2-9B/c2bad77e-c0d0-4a43-8853-9363cc618603.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge3-gemma-2-9B/1762652580.6161401", - "retrieved_timestamp": "1762652580.616141", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT-Merge3-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Merge3-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7858526487497617 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6102112889343964 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42575 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4373337765957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge4-gemma-2-9B/7b515db9-e76c-495f-b4f8-a65b913f40e9.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge4-gemma-2-9B/7b515db9-e76c-495f-b4f8-a65b913f40e9.json deleted file mode 100644 index 9b8f5be8ff8be9df3dd28c2d0361e74fbe678951..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge4-gemma-2-9B/7b515db9-e76c-495f-b4f8-a65b913f40e9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge4-gemma-2-9B/1762652580.616342", - "retrieved_timestamp": "1762652580.616342", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT-Merge4-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Merge4-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7807317916461656 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6118218058684427 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21676737160120846 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42943749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43899601063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge5-gemma-2-9B/f9e1d208-d1ab-4518-9b1b-1470af8bef12.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge5-gemma-2-9B/f9e1d208-d1ab-4518-9b1b-1470af8bef12.json deleted file mode 100644 index a460e1d644d9f2e9c9b9a27a414a2ad4735754e7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge5-gemma-2-9B/f9e1d208-d1ab-4518-9b1b-1470af8bef12.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge5-gemma-2-9B/1762652580.616543", - "retrieved_timestamp": "1762652580.616544", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT-Merge5-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Merge5-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7843787816327346 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6122674386670167 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21827794561933533 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35318791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42813541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4387466755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge6-gemma-2-9B/3c796c74-d79c-4c9f-a5ab-dee6c237bde1.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge6-gemma-2-9B/3c796c74-d79c-4c9f-a5ab-dee6c237bde1.json deleted file mode 100644 index d86f6b21db1b0428bd52dcd1561a885ad1dc92f5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge6-gemma-2-9B/3c796c74-d79c-4c9f-a5ab-dee6c237bde1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge6-gemma-2-9B/1762652580.6167512", - "retrieved_timestamp": "1762652580.6167512", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT-Merge6-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-Merge6-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16946036516443036 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5949106849534558 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08006042296072508 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40978125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41148603723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-gemma-2-9B/061fc038-b3fd-4d5b-8ab7-7f3713ad9e55.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-gemma-2-9B/061fc038-b3fd-4d5b-8ab7-7f3713ad9e55.json deleted file mode 100644 index 47a3fa2f1179da5eddb3217e43e7ffb3489a698d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-gemma-2-9B/061fc038-b3fd-4d5b-8ab7-7f3713ad9e55.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT-gemma-2-9B/1762652580.616956", - "retrieved_timestamp": "1762652580.616957", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7968434863938794 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6063604478633632 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40711458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42237367021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen1-gemma-2-9B/b869eab0-f736-48ef-8870-b98636cc4da1.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen1-gemma-2-9B/b869eab0-f736-48ef-8870-b98636cc4da1.json deleted file mode 100644 index 6755168ff6068bf86485747d348686b7dd9846f2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen1-gemma-2-9B/b869eab0-f736-48ef-8870-b98636cc4da1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen1-gemma-2-9B/1762652580.617173", - "retrieved_timestamp": "1762652580.617174", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT1-Gen1-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT1-Gen1-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7974430067775724 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6117787046647335 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43095833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43758311170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen2-gemma-2-9B/2871c1f6-4010-48e4-8020-1c5024474934.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen2-gemma-2-9B/2871c1f6-4010-48e4-8020-1c5024474934.json deleted file mode 100644 index 73d06534a6ad690d2ee3288a0c279e033a905252..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen2-gemma-2-9B/2871c1f6-4010-48e4-8020-1c5024474934.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen2-gemma-2-9B/1762652580.617375", - "retrieved_timestamp": "1762652580.617376", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT1-Gen2-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT1-Gen2-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7983672211953173 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6095989894691557 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22507552870090636 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42835416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43550531914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen3-gemma-2-9B/69b008dd-f8ad-49ce-9bca-fff2e2ce6b72.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen3-gemma-2-9B/69b008dd-f8ad-49ce-9bca-fff2e2ce6b72.json deleted file mode 100644 index 293264853e285c9659b0716a50ec5fcca9388d26..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen3-gemma-2-9B/69b008dd-f8ad-49ce-9bca-fff2e2ce6b72.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen3-gemma-2-9B/1762652580.617578", - "retrieved_timestamp": "1762652580.617579", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT1-Gen3-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT1-Gen3-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.795969139660545 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6101551392017761 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42432291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43492353723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen4-gemma-2-9B/e10f8a93-7131-446d-b792-d179f522a262.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen4-gemma-2-9B/e10f8a93-7131-446d-b792-d179f522a262.json deleted file mode 100644 index b2730bf8e6b31d5f3641c7faf44d9277e9ca38ec..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen4-gemma-2-9B/e10f8a93-7131-446d-b792-d179f522a262.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen4-gemma-2-9B/1762652580.617781", - "retrieved_timestamp": "1762652580.617782", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT1-Gen4-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT1-Gen4-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7941207108250552 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6057567677609054 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21601208459214502 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42311458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42860704787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen5-IF-gemma-2-S2DMv1-9B/182a7558-c9f7-43a6-a928-d5d97e082a91.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen5-IF-gemma-2-S2DMv1-9B/182a7558-c9f7-43a6-a928-d5d97e082a91.json deleted file mode 100644 index cf823339b764f4e2c911be85bdc4ad637853c364..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen5-IF-gemma-2-S2DMv1-9B/182a7558-c9f7-43a6-a928-d5d97e082a91.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen5-IF-gemma-2-S2DMv1-9B/1762652580.617982", - "retrieved_timestamp": "1762652580.6179829", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT1-Gen5-IF-gemma-2-S2DMv1-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT1-Gen5-IF-gemma-2-S2DMv1-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7929216700576691 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6000001533684681 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4244791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42179188829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen5-gemma-2-9B/46f2caf1-29e8-4173-b2b2-e54e905e71d9.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen5-gemma-2-9B/46f2caf1-29e8-4173-b2b2-e54e905e71d9.json deleted file mode 100644 index 54c5be383e476174fa0284d163fc489bc09a74b3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen5-gemma-2-9B/46f2caf1-29e8-4173-b2b2-e54e905e71d9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen5-gemma-2-9B/1762652580.618199", - "retrieved_timestamp": "1762652580.6182", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT1-Gen5-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT1-Gen5-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7794828831943688 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6017455017631886 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20770392749244712 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41914583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42220744680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen6-gemma-2-9B/fcf4087e-9d89-4e8a-a817-6c9092445208.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen6-gemma-2-9B/fcf4087e-9d89-4e8a-a817-6c9092445208.json deleted file mode 100644 index a585f0f127744a9cae45fb22d506fd0f75ee5aff..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen6-gemma-2-9B/fcf4087e-9d89-4e8a-a817-6c9092445208.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen6-gemma-2-9B/1762652580.618452", - "retrieved_timestamp": "1762652580.618453", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT1-Gen6-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT1-Gen6-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16336542595867853 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5943545352208355 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08081570996978851 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40444791666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4133144946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen7-gemma-2-9B/5b8bdeea-19cf-41c0-890a-55ae1b740e75.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen7-gemma-2-9B/5b8bdeea-19cf-41c0-890a-55ae1b740e75.json deleted file mode 100644 index 7c6afdc0ddaf4a0716200ac959cd0735fe2cde97..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen7-gemma-2-9B/5b8bdeea-19cf-41c0-890a-55ae1b740e75.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen7-gemma-2-9B/1762652580.6186602", - "retrieved_timestamp": "1762652580.6186612", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT1-Gen7-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT1-Gen7-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16336542595867853 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5937953240176393 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41111458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4144780585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Max-Merge_02012025163610-gemma-2-9B/01fcc284-cedc-48b7-bc21-b8ec6dd53d3c.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Max-Merge_02012025163610-gemma-2-9B/01fcc284-cedc-48b7-bc21-b8ec6dd53d3c.json deleted file mode 100644 index 50563e4b84abc64780fd7024c3897cc92ec3d8e5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Max-Merge_02012025163610-gemma-2-9B/01fcc284-cedc-48b7-bc21-b8ec6dd53d3c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT1-Max-Merge_02012025163610-gemma-2-9B/1762652580.618859", - "retrieved_timestamp": "1762652580.61886", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT1-Max-Merge_02012025163610-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT1-Max-Merge_02012025163610-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7928718023732585 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6122674386670167 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22280966767371602 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3548657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4255 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43816489361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-gemma-2-9B/17cda965-9f4b-411c-977f-1fe3238f527f.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-gemma-2-9B/17cda965-9f4b-411c-977f-1fe3238f527f.json deleted file mode 100644 index 6de14f5428d9ad2f8683cb0155fb4007bc66dfef..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-gemma-2-9B/17cda965-9f4b-411c-977f-1fe3238f527f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT1-gemma-2-9B/1762652580.619083", - "retrieved_timestamp": "1762652580.6190841", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT1-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT1-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7946703635243377 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6108745950756924 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22356495468277945 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43222916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4357546542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen1-gemma-2-9B/e6c0f96c-6189-4ed1-bf68-e762249170e7.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen1-gemma-2-9B/e6c0f96c-6189-4ed1-bf68-e762249170e7.json deleted file mode 100644 index c7da51e4681cc790edf66e702a54071578fb7461..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen1-gemma-2-9B/e6c0f96c-6189-4ed1-bf68-e762249170e7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen1-gemma-2-9B/1762652580.619495", - "retrieved_timestamp": "1762652580.619499", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT2-Gen1-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT2-Gen1-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7855778224001206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6100802027920743 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2212990936555891 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42432291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4376662234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen2-gemma-2-9B/556a83e2-9b7c-432e-99d5-804da880dfc6.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen2-gemma-2-9B/556a83e2-9b7c-432e-99d5-804da880dfc6.json deleted file mode 100644 index 11b5d77685bcff6186163f76731cf0cc5bd52090..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen2-gemma-2-9B/556a83e2-9b7c-432e-99d5-804da880dfc6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen2-gemma-2-9B/1762652580.6198761", - "retrieved_timestamp": "1762652580.619877", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT2-Gen2-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT2-Gen2-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7889001183526376 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6092917531936446 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21827794561933533 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42702083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43882978723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen3-gemma-2-9B/1aa85069-5409-4c32-91d5-1f417be4e465.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen3-gemma-2-9B/1aa85069-5409-4c32-91d5-1f417be4e465.json deleted file mode 100644 index 825d5351f9152669313b00a45b7afaf616baa667..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen3-gemma-2-9B/1aa85069-5409-4c32-91d5-1f417be4e465.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen3-gemma-2-9B/1762652580.620111", - "retrieved_timestamp": "1762652580.620112", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT2-Gen3-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT2-Gen3-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7810066179958066 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6104772065373926 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2107250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4230833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43741688829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen4-gemma-2-9B/eb55e4d5-dde4-4349-b8aa-9297604cedf0.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen4-gemma-2-9B/eb55e4d5-dde4-4349-b8aa-9297604cedf0.json deleted file mode 100644 index eb33825216f108c759949ac23058b90e703e5b7e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen4-gemma-2-9B/eb55e4d5-dde4-4349-b8aa-9297604cedf0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen4-gemma-2-9B/1762652580.620331", - "retrieved_timestamp": "1762652580.620331", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT2-Gen4-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT2-Gen4-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7895993741051521 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.609655139201776 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22356495468277945 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41254166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43209773936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen5-gemma-2-9B/3f7eb2b4-8dfb-4bf5-a462-0c11ccbae935.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen5-gemma-2-9B/3f7eb2b4-8dfb-4bf5-a462-0c11ccbae935.json deleted file mode 100644 index fbd77c18bc6398d5854d8c712bd20ae1aac20203..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen5-gemma-2-9B/3f7eb2b4-8dfb-4bf5-a462-0c11ccbae935.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen5-gemma-2-9B/1762652580.6205592", - "retrieved_timestamp": "1762652580.6205592", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT2-Gen5-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT2-Gen5-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7749116787900548 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6063933817527739 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2107250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42441666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43018617021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen6-gemma-2-9B/35e1f76a-96d6-42af-a51b-b1b453536723.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen6-gemma-2-9B/35e1f76a-96d6-42af-a51b-b1b453536723.json deleted file mode 100644 index a54c87074ef2cb5cc72e7b20a9f30f5e885ea28a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen6-gemma-2-9B/35e1f76a-96d6-42af-a51b-b1b453536723.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen6-gemma-2-9B/1762652580.620769", - "retrieved_timestamp": "1762652580.620769", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT2-Gen6-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT2-Gen6-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16641289556155447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.595964957637105 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41371874999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42096077127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen7-gemma-2-9B/4b9e66cf-0ddb-4878-8800-2bc05dec750a.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen7-gemma-2-9B/4b9e66cf-0ddb-4878-8800-2bc05dec750a.json deleted file mode 100644 index 9a38a2b480c041244aaf95348c60a18683bb77bd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen7-gemma-2-9B/4b9e66cf-0ddb-4878-8800-2bc05dec750a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen7-gemma-2-9B/1762652580.621203", - "retrieved_timestamp": "1762652580.621205", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT2-Gen7-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT2-Gen7-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17615482475387528 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6078922830693557 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3548657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42032291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4311003989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Max-Merge_02012025163610-gemma-2-9B/2144960d-f674-45bd-9509-3cf711dc697b.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Max-Merge_02012025163610-gemma-2-9B/2144960d-f674-45bd-9509-3cf711dc697b.json deleted file mode 100644 index 142b173c58bb016efd5739c9b9b8e83a01ebf8a3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Max-Merge_02012025163610-gemma-2-9B/2144960d-f674-45bd-9509-3cf711dc697b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT2-Max-Merge_02012025163610-gemma-2-9B/1762652580.6214652", - "retrieved_timestamp": "1762652580.6214678", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT2-Max-Merge_02012025163610-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT2-Max-Merge_02012025163610-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7901490268044344 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6108461203950706 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42283333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4390791223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-gemma-2-9B/0644b140-506f-4c7a-ba59-50ab48fad799.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-gemma-2-9B/0644b140-506f-4c7a-ba59-50ab48fad799.json deleted file mode 100644 index 4541304e5948ecc7c038683132ceaf5bd3923f1b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-gemma-2-9B/0644b140-506f-4c7a-ba59-50ab48fad799.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT2-gemma-2-9B/1762652580.6217349", - "retrieved_timestamp": "1762652580.621736", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT2-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT2-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7885754243185858 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.611511004530543 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2212990936555891 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42165625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43683510638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen1-gemma-2-9B/1964f25a-d5b2-467a-a30d-9338082bdcfb.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen1-gemma-2-9B/1964f25a-d5b2-467a-a30d-9338082bdcfb.json deleted file mode 100644 index 13c215397f3584c93908bfd6a228f0e9f6567363..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen1-gemma-2-9B/1964f25a-d5b2-467a-a30d-9338082bdcfb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen1-gemma-2-9B/1762652580.6219652", - "retrieved_timestamp": "1762652580.6219661", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT3-Gen1-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT3-Gen1-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7837792612490415 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6106760932030332 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41511458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43267952127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen2-gemma-2-9B/55315256-9b4d-4dbd-bc53-7ec384e0fdca.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen2-gemma-2-9B/55315256-9b4d-4dbd-bc53-7ec384e0fdca.json deleted file mode 100644 index f1c6bae89fa84da136ce7d423a2b1c2138d0a949..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen2-gemma-2-9B/55315256-9b4d-4dbd-bc53-7ec384e0fdca.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen2-gemma-2-9B/1762652580.622196", - "retrieved_timestamp": "1762652580.622197", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT3-Gen2-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT3-Gen2-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7843289139483238 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6091473194676166 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22356495468277945 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3573825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41111458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43326130319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen3-gemma-2-9B/71710546-99cb-4180-9454-1e77696fccf3.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen3-gemma-2-9B/71710546-99cb-4180-9454-1e77696fccf3.json deleted file mode 100644 index 12159a64f6971ab14dfb1f0000c16a6b103abfbc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen3-gemma-2-9B/71710546-99cb-4180-9454-1e77696fccf3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen3-gemma-2-9B/1762652580.622438", - "retrieved_timestamp": "1762652580.622439", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT3-Gen3-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT3-Gen3-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7856276900845313 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6088892215987798 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21525679758308158 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42575 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4302692819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen4-gemma-2-9B/96b38b17-8c70-4ecf-beb5-8e6ed84942ac.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen4-gemma-2-9B/96b38b17-8c70-4ecf-beb5-8e6ed84942ac.json deleted file mode 100644 index c229f051a38c4c65dad9ab5456c675d24b4b8e65..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen4-gemma-2-9B/96b38b17-8c70-4ecf-beb5-8e6ed84942ac.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen4-gemma-2-9B/1762652580.6226869", - "retrieved_timestamp": "1762652580.622689", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT3-Gen4-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT3-Gen4-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7737126380226687 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6100843629460684 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20619335347432025 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4476354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4387466755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen5-gemma-2-9B/53dc50c8-fa89-4d31-92d6-f8b02543e272.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen5-gemma-2-9B/53dc50c8-fa89-4d31-92d6-f8b02543e272.json deleted file mode 100644 index 479c08a33eb50b64449d67c491106b8088b32dc5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen5-gemma-2-9B/53dc50c8-fa89-4d31-92d6-f8b02543e272.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen5-gemma-2-9B/1762652580.622956", - "retrieved_timestamp": "1762652580.622956", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT3-Gen5-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT3-Gen5-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7990166092634211 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6098615465467813 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22658610271903323 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35318791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41911458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43168218085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen5-gemma-2-9B_v1/95fe9cce-c93d-47e3-a053-defe922abefa.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen5-gemma-2-9B_v1/95fe9cce-c93d-47e3-a053-defe922abefa.json deleted file mode 100644 index 27a4fac29f707facfff85cfa3255ba265cbbcdda..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen5-gemma-2-9B_v1/95fe9cce-c93d-47e3-a053-defe922abefa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen5-gemma-2-9B_v1/1762652580.623179", - "retrieved_timestamp": "1762652580.623179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT3-Gen5-gemma-2-9B_v1", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT3-Gen5-gemma-2-9B_v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7996161296471141 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6113330718661595 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22280966767371602 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4203854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4359208776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen6-gemma-2-9B/9f093c1a-eabc-4ee3-9e43-9ac0bc3afa08.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen6-gemma-2-9B/9f093c1a-eabc-4ee3-9e43-9ac0bc3afa08.json deleted file mode 100644 index 8904c0b51c21c158b184837c9225e0cddb37cf65..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen6-gemma-2-9B/9f093c1a-eabc-4ee3-9e43-9ac0bc3afa08.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen6-gemma-2-9B/1762652580.623395", - "retrieved_timestamp": "1762652580.623395", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT3-Gen6-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT3-Gen6-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17615482475387528 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6020072592121909 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08836858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4125729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41023936170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Max-Merge_02012025163610-gemma-2-9B/42e21a24-7c3c-4e65-ad6e-0b18f6c048eb.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Max-Merge_02012025163610-gemma-2-9B/42e21a24-7c3c-4e65-ad6e-0b18f6c048eb.json deleted file mode 100644 index c97bd6912f411ca7a19fe50f4172341baf8f32da..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Max-Merge_02012025163610-gemma-2-9B/42e21a24-7c3c-4e65-ad6e-0b18f6c048eb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT3-Max-Merge_02012025163610-gemma-2-9B/1762652580.623601", - "retrieved_timestamp": "1762652580.623602", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT3-Max-Merge_02012025163610-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT3-Max-Merge_02012025163610-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17615482475387528 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6123461203950705 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10120845921450151 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42546875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4389128989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-gemma-2-9B/0b8f178b-9980-4250-bc82-66facb367eb8.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-gemma-2-9B/0b8f178b-9980-4250-bc82-66facb367eb8.json deleted file mode 100644 index 3e69d166cbb64c38302d56c07bf6beb68e0e2fdf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-gemma-2-9B/0b8f178b-9980-4250-bc82-66facb367eb8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT3-gemma-2-9B/1762652580.623819", - "retrieved_timestamp": "1762652580.62382", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT3-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT3-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7786085364610345 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.61307842026088 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21676737160120846 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447986577181208 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4242916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43267952127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen1-gemma-2-9B/6e5b6be6-cc1d-4a03-8e5e-eeede4ee4298.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen1-gemma-2-9B/6e5b6be6-cc1d-4a03-8e5e-eeede4ee4298.json deleted file mode 100644 index 8b337d546e3b32a817cef7054b3d82d4e7f75a1f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen1-gemma-2-9B/6e5b6be6-cc1d-4a03-8e5e-eeede4ee4298.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT4-Gen1-gemma-2-9B/1762652580.624031", - "retrieved_timestamp": "1762652580.624032", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT4-Gen1-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT4-Gen1-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7894996387363307 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6093827996028333 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21978851963746224 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43222916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4389128989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen2-gemma-2-9B/e7f0b28a-32c6-4faf-9cb4-c2ee4a075135.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen2-gemma-2-9B/e7f0b28a-32c6-4faf-9cb4-c2ee4a075135.json deleted file mode 100644 index 204c5eadeabfa733f6b83f863a824c582b913fa1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen2-gemma-2-9B/e7f0b28a-32c6-4faf-9cb4-c2ee4a075135.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT4-Gen2-gemma-2-9B/1762652580.6242292", - "retrieved_timestamp": "1762652580.62423", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT4-Gen2-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT4-Gen2-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8050616807847621 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6108348543973539 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42565625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4367519946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen3-gemma-2-9B/b84ca7e1-4746-449a-841f-fcfd71774104.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen3-gemma-2-9B/b84ca7e1-4746-449a-841f-fcfd71774104.json deleted file mode 100644 index 9f3bd3e1301fa77bb34e603f39d399b0f6c6d485..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen3-gemma-2-9B/b84ca7e1-4746-449a-841f-fcfd71774104.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT4-Gen3-gemma-2-9B/1762652580.624489", - "retrieved_timestamp": "1762652580.62449", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT4-Gen3-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT4-Gen3-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7840540875986826 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6087112889343964 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42432291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4380817819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen4-gemma-2-9B/b38dc953-12fb-41aa-a887-d9a30ff1799a.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen4-gemma-2-9B/b38dc953-12fb-41aa-a887-d9a30ff1799a.json deleted file mode 100644 index 638b932fcb87ff65588e4077aebda1fdbb67b489..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen4-gemma-2-9B/b38dc953-12fb-41aa-a887-d9a30ff1799a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT4-Gen4-gemma-2-9B/1762652580.6246998", - "retrieved_timestamp": "1762652580.624701", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT4-Gen4-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT4-Gen4-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7874262512356104 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6076031496231499 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42435416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4323470744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen5-gemma-2-9B/4a35f213-f9b7-40c5-b164-722f6b4ee933.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen5-gemma-2-9B/4a35f213-f9b7-40c5-b164-722f6b4ee933.json deleted file mode 100644 index 01a571449b23663ce9d9724bd9f2bda0fb57d458..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen5-gemma-2-9B/4a35f213-f9b7-40c5-b164-722f6b4ee933.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT4-Gen5-gemma-2-9B/1762652580.6249092", - "retrieved_timestamp": "1762652580.62491", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT4-Gen5-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT4-Gen5-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7788833628106757 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6106664051994928 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22658610271903323 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565436241610738 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42683333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43841422872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Max-Merge_02012025163610-gemma-2-9B/ae4224f6-36e8-48e2-a0bf-a79299c365ad.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Max-Merge_02012025163610-gemma-2-9B/ae4224f6-36e8-48e2-a0bf-a79299c365ad.json deleted file mode 100644 index 17a26f90b30969cf5d0e2dfb99be53b19f6dd0d9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Max-Merge_02012025163610-gemma-2-9B/ae4224f6-36e8-48e2-a0bf-a79299c365ad.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT4-Max-Merge_02012025163610-gemma-2-9B/1762652580.625107", - "retrieved_timestamp": "1762652580.625107", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT4-Max-Merge_02012025163610-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT4-Max-Merge_02012025163610-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1770790391716202 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6120127870617372 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09516616314199396 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4228020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4390791223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-gemma-2-9B/a312ee46-fd2f-4a0d-a778-7e235910a147.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-gemma-2-9B/a312ee46-fd2f-4a0d-a778-7e235910a147.json deleted file mode 100644 index 3bed200d498786bac68919561587ff0472bfb6b0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-gemma-2-9B/a312ee46-fd2f-4a0d-a778-7e235910a147.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT4-gemma-2-9B/1762652580.62533", - "retrieved_timestamp": "1762652580.625331", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT4-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT4-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7761605872418517 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.607313601341302 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43092708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43658577127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen1-gemma-2-9B/b311d3f4-6eda-4053-91d2-416c4d796c6d.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen1-gemma-2-9B/b311d3f4-6eda-4053-91d2-416c4d796c6d.json deleted file mode 100644 index e8cc60221a8730987113831e512d3af82dcc3aca..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen1-gemma-2-9B/b311d3f4-6eda-4053-91d2-416c4d796c6d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT5-Gen1-gemma-2-9B/1762652580.625538", - "retrieved_timestamp": "1762652580.625539", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT5-Gen1-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT5-Gen1-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7831298731809377 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6110476837383056 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2212990936555891 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4203854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43683510638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen2-gemma-2-9B/d59d00da-e88f-4d1a-9c47-538020ae0114.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen2-gemma-2-9B/d59d00da-e88f-4d1a-9c47-538020ae0114.json deleted file mode 100644 index d5d80e8e669c265c0fb7d2a4e909f89fb1b09411..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen2-gemma-2-9B/d59d00da-e88f-4d1a-9c47-538020ae0114.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT5-Gen2-gemma-2-9B/1762652580.625738", - "retrieved_timestamp": "1762652580.625739", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT5-Gen2-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT5-Gen2-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7962439660101863 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.610541261742359 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41629166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4379155585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen3-gemma-2-9B/1ff959c7-3477-40e5-8460-971337adc788.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen3-gemma-2-9B/1ff959c7-3477-40e5-8460-971337adc788.json deleted file mode 100644 index 585f25eb300ef1bfb106ebd8fda0c6b1a57183de..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen3-gemma-2-9B/1ff959c7-3477-40e5-8460-971337adc788.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT5-Gen3-gemma-2-9B/1762652580.625941", - "retrieved_timestamp": "1762652580.625942", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT5-Gen3-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT5-Gen3-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7825303527972447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6090494662695 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21676737160120846 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42305208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4375 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen4-gemma-2-9B/6cbd7c31-df0a-4920-9c23-be53f107698e.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen4-gemma-2-9B/6cbd7c31-df0a-4920-9c23-be53f107698e.json deleted file mode 100644 index b6a3c30ec2b6787f2a02d9131a9e061abaf7598b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen4-gemma-2-9B/6cbd7c31-df0a-4920-9c23-be53f107698e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT5-Gen4-gemma-2-9B/1762652580.62615", - "retrieved_timestamp": "1762652580.6261508", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT5-Gen4-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT5-Gen4-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7834545672149895 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6131056160021203 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35318791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42283333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4396609042553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen5-gemma-2-9B/b4ca4df6-2631-4ba3-bb55-8eadec5dd348.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen5-gemma-2-9B/b4ca4df6-2631-4ba3-bb55-8eadec5dd348.json deleted file mode 100644 index cfbcae7705facc51b97b209c48d374edb5357991..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen5-gemma-2-9B/b4ca4df6-2631-4ba3-bb55-8eadec5dd348.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT5-Gen5-gemma-2-9B/1762652580.6263602", - "retrieved_timestamp": "1762652580.6263611", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT5-Gen5-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT5-Gen5-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7947202312087482 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6111664051994928 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2258308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34815436241610737 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41911458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43292885638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Max-Merge_02012025163610-gemma-2-9B/6737b327-bd1c-4eee-a461-af685edcd7b5.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Max-Merge_02012025163610-gemma-2-9B/6737b327-bd1c-4eee-a461-af685edcd7b5.json deleted file mode 100644 index 6abf38344157ae3cdbbc723b23ac92c183bdd71e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Max-Merge_02012025163610-gemma-2-9B/6737b327-bd1c-4eee-a461-af685edcd7b5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT5-Max-Merge_02012025163610-gemma-2-9B/1762652580.62657", - "retrieved_timestamp": "1762652580.62657", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT5-Max-Merge_02012025163610-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT5-Max-Merge_02012025163610-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17615482475387528 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6126794537284038 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09818731117824774 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4227708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43899601063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-gemma-2-9B/dd306da8-60aa-4022-8d04-1942fd19bc0b.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-gemma-2-9B/dd306da8-60aa-4022-8d04-1942fd19bc0b.json deleted file mode 100644 index d8a5be6e55c51c72e9d6a78bbc84cd85d9637882..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-gemma-2-9B/dd306da8-60aa-4022-8d04-1942fd19bc0b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MT5-gemma-2-9B/1762652580.6267788", - "retrieved_timestamp": "1762652580.6267798", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MT5-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MT5-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8047868544351211 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6112225549321132 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2258308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4203854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4366688829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MTM-Merge-gemma-2-9B/e0354dac-3ad8-4342-92a9-be0182051cac.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MTM-Merge-gemma-2-9B/e0354dac-3ad8-4342-92a9-be0182051cac.json deleted file mode 100644 index e95e65d61467bc8f50155ed711afb1dc34d71ee5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MTM-Merge-gemma-2-9B/e0354dac-3ad8-4342-92a9-be0182051cac.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MTM-Merge-gemma-2-9B/1762652580.626984", - "retrieved_timestamp": "1762652580.626985", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MTM-Merge-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MTM-Merge-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7798075772284205 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6133348543973538 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3548657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4267708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43882978723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MTMaMe-Merge_02012025163610-gemma-2-9B/b1a8ede3-2f27-4825-a413-e1772743b7c6.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MTMaMe-Merge_02012025163610-gemma-2-9B/b1a8ede3-2f27-4825-a413-e1772743b7c6.json deleted file mode 100644 index 98933922bb9611c203075e28a2f1b3af63622ea1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MTMaMe-Merge_02012025163610-gemma-2-9B/b1a8ede3-2f27-4825-a413-e1772743b7c6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_MTMaMe-Merge_02012025163610-gemma-2-9B/1762652580.627192", - "retrieved_timestamp": "1762652580.627192", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/MTMaMe-Merge_02012025163610-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/MTMaMe-Merge_02012025163610-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17860277397305815 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6116794537284039 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09592145015105741 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42410416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43816489361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_Rv0.4DMv1t0.25-gemma-2-9B/522e1145-3f25-4b5d-9b6a-7ad0047b2da5.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_Rv0.4DMv1t0.25-gemma-2-9B/522e1145-3f25-4b5d-9b6a-7ad0047b2da5.json deleted file mode 100644 index 0a77c0acf5ba5313b110426b40c8320340d64575..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_Rv0.4DMv1t0.25-gemma-2-9B/522e1145-3f25-4b5d-9b6a-7ad0047b2da5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_Rv0.4DMv1t0.25-gemma-2-9B/1762652580.627404", - "retrieved_timestamp": "1762652580.627404", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/Rv0.4DMv1t0.25-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/Rv0.4DMv1t0.25-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7496575752337131 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6069712638522043 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2258308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43092708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44007646276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_Rv0.4DMv1t0.25Tt0.25-gemma-2-9B/64790745-5edc-49d9-8111-822d54518b58.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_Rv0.4DMv1t0.25Tt0.25-gemma-2-9B/64790745-5edc-49d9-8111-822d54518b58.json deleted file mode 100644 index 7c86ae516ed2ab35f56294ef89fc7eb36c500f21..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_Rv0.4DMv1t0.25Tt0.25-gemma-2-9B/64790745-5edc-49d9-8111-822d54518b58.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_Rv0.4DMv1t0.25Tt0.25-gemma-2-9B/1762652580.627618", - "retrieved_timestamp": "1762652580.627619", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/Rv0.4DMv1t0.25Tt0.25-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/Rv0.4DMv1t0.25Tt0.25-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7646200968984517 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6097862253440982 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20694864048338368 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3422818791946309 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4282916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43467420212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_Rv0.4MT4g2-gemma-2-9B/7e232332-cf13-4127-be18-1311921931e6.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_Rv0.4MT4g2-gemma-2-9B/7e232332-cf13-4127-be18-1311921931e6.json deleted file mode 100644 index 5b83fe416ebb20cef2bda3afd8af40227488da1a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_Rv0.4MT4g2-gemma-2-9B/7e232332-cf13-4127-be18-1311921931e6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_Rv0.4MT4g2-gemma-2-9B/1762652580.627839", - "retrieved_timestamp": "1762652580.62784", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/Rv0.4MT4g2-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/Rv0.4MT4g2-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7320221456845614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.604119644415618 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19486404833836857 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35318791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4230833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44173869680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_T31122024203920-gemma-2-9B/f1312aef-339c-487a-b0fa-1bf4a77f0910.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_T31122024203920-gemma-2-9B/f1312aef-339c-487a-b0fa-1bf4a77f0910.json deleted file mode 100644 index 85226854a9b88527647c89b72890b9619f33d583..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_T31122024203920-gemma-2-9B/f1312aef-339c-487a-b0fa-1bf4a77f0910.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_T31122024203920-gemma-2-9B/1762652580.628056", - "retrieved_timestamp": "1762652580.628057", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/T31122024203920-gemma-2-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/T31122024203920-gemma-2-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7676176988169169 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6095634089448112 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4321979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.437250664893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_Test01012025155054t0.5_gemma-2/73f07833-1d35-484f-8fe3-57f4c27e1277.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_Test01012025155054t0.5_gemma-2/73f07833-1d35-484f-8fe3-57f4c27e1277.json deleted file mode 100644 index b85e9826519a5e54182ebe46e3631542513aba3e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_Test01012025155054t0.5_gemma-2/73f07833-1d35-484f-8fe3-57f4c27e1277.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_Test01012025155054t0.5_gemma-2/1762652580.628514", - "retrieved_timestamp": "1762652580.628514", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/Test01012025155054t0.5_gemma-2", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/Test01012025155054t0.5_gemma-2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1555229014570229 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28295044895258115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24161073825503357 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36702083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10904255319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 3.817 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_gemma-2-S2MTM-9B/e0eb1bbf-923b-4bee-8390-288c21607e0e.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_gemma-2-S2MTM-9B/e0eb1bbf-923b-4bee-8390-288c21607e0e.json deleted file mode 100644 index f85ad77edfb0163df1f60922be5c5339fa9e4d55..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_gemma-2-S2MTM-9B/e0eb1bbf-923b-4bee-8390-288c21607e0e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_gemma-2-S2MTM-9B/1762652580.628712", - "retrieved_timestamp": "1762652580.628713", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/gemma-2-S2MTM-9B", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/gemma-2-S2MTM-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7822555264476034 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6060836790982922 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20468277945619334 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42184375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4296875 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25/b9ce6ed3-132a-44ed-9efc-dbfcc83d6799.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25/b9ce6ed3-132a-44ed-9efc-dbfcc83d6799.json deleted file mode 100644 index 8a8858e6195ee2fa4a9b453da2dc222551575f10..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25/b9ce6ed3-132a-44ed-9efc-dbfcc83d6799.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25/1762652580.630025", - "retrieved_timestamp": "1762652580.630029", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7706651684197928 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6075432245295168 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43226041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4399933510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75/a2b9a953-31e2-4a6f-8005-993e1133246e.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75/a2b9a953-31e2-4a6f-8005-993e1133246e.json deleted file mode 100644 index 8e7fd6e65872296a6758e184799b058c8fd7037b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75/a2b9a953-31e2-4a6f-8005-993e1133246e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75/1762652580.630381", - "retrieved_timestamp": "1762652580.630382", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7208063493752133 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5995203934792884 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20166163141993956 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3498322147651007 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4140625 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1/6850eb56-9f2c-4d4f-a82a-29e24b81b8b3.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1/6850eb56-9f2c-4d4f-a82a-29e24b81b8b3.json deleted file mode 100644 index 99d5c29b13c8f08f91408acea20bed447b2322fb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1/6850eb56-9f2c-4d4f-a82a-29e24b81b8b3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1/1762652580.628911", - "retrieved_timestamp": "1762652580.6289122", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7648949232480928 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6074511952177571 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2280966767371601 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3498322147651007 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41362499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43209773936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.2/7f429355-b60b-4298-8eb0-a072a80898d7.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.2/7f429355-b60b-4298-8eb0-a072a80898d7.json deleted file mode 100644 index 178bd494c08b05363f4268119f9e4e4af7ef9c11..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.2/7f429355-b60b-4298-8eb0-a072a80898d7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.2/1762652580.6306539", - "retrieved_timestamp": "1762652580.6306539", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.2", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.759999024809727 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6066260664115647 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22280966767371602 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34815436241610737 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4109583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43226396276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1/774a3b0c-acae-4ad2-a2a6-42c30e1db7c0.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1/774a3b0c-acae-4ad2-a2a6-42c30e1db7c0.json deleted file mode 100644 index 217203e5fbefed2dd4a4df6480be7d2c97758580..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1/774a3b0c-acae-4ad2-a2a6-42c30e1db7c0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1/1762652580.630864", - "retrieved_timestamp": "1762652580.6308649", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7615227596111651 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6098779556010631 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20996978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43102083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4315159574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ifable-9B-v0.1/e8502d8d-87bd-444c-b41b-7f8d4eb15b29.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ifable-9B-v0.1/e8502d8d-87bd-444c-b41b-7f8d4eb15b29.json deleted file mode 100644 index b3e5918a4c0ad06f07a6a0c25b9fd923f36c5427..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ifable-9B-v0.1/e8502d8d-87bd-444c-b41b-7f8d4eb15b29.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Ifable-9B-v0.1/1762652580.6310751", - "retrieved_timestamp": "1762652580.631076", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/recoilme-gemma-2-Ifable-9B-v0.1", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/recoilme-gemma-2-Ifable-9B-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7943955371746965 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6064399292200404 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42022916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4323470744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-psy10k-mental_healt-9B-v0.1/735bed66-1e83-4647-b730-14f0d571d597.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-psy10k-mental_healt-9B-v0.1/735bed66-1e83-4647-b730-14f0d571d597.json deleted file mode 100644 index 79b952a8e42d601fee9920d9a3348aba70391377..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-psy10k-mental_healt-9B-v0.1/735bed66-1e83-4647-b730-14f0d571d597.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-psy10k-mental_healt-9B-v0.1/1762652580.631496", - "retrieved_timestamp": "1762652580.631499", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/recoilme-gemma-2-psy10k-mental_healt-9B-v0.1", - "developer": "google", - "inference_platform": "unknown", - "id": "zelk12/recoilme-gemma-2-psy10k-mental_healt-9B-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.744536718130117 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.597759349920723 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18882175226586104 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42946875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41805186170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/goulue5/goulue5_merging_LLM/a7fb7d77-93c3-41c8-a85a-692953dcd2c6.json b/leaderboard_data/HFOpenLLMv2/goulue5/goulue5_merging_LLM/a7fb7d77-93c3-41c8-a85a-692953dcd2c6.json deleted file mode 100644 index 0f0b83177188d2b05d04f09f30ed014011216677..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/goulue5/goulue5_merging_LLM/a7fb7d77-93c3-41c8-a85a-692953dcd2c6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/goulue5_merging_LLM/1762652580.1806688", - "retrieved_timestamp": "1762652580.18067", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "goulue5/merging_LLM", - "developer": "goulue5", - "inference_platform": "unknown", - "id": "goulue5/merging_LLM" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32326006108237254 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4216498611590102 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09667673716012085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43328125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29579454787234044 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/gradientai/gradientai_Llama-3-8B-Instruct-Gradient-1048k/79d366fc-e21c-4e5e-bb94-8d221d9df715.json b/leaderboard_data/HFOpenLLMv2/gradientai/gradientai_Llama-3-8B-Instruct-Gradient-1048k/79d366fc-e21c-4e5e-bb94-8d221d9df715.json deleted file mode 100644 index 5f7b0332686c1a79bec184822fed25a8f429fd12..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/gradientai/gradientai_Llama-3-8B-Instruct-Gradient-1048k/79d366fc-e21c-4e5e-bb94-8d221d9df715.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/gradientai_Llama-3-8B-Instruct-Gradient-1048k/1762652580.181334", - "retrieved_timestamp": "1762652580.181335", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "gradientai/Llama-3-8B-Instruct-Gradient-1048k", - "developer": "gradientai", - "inference_platform": "unknown", - "id": "gradientai/Llama-3-8B-Instruct-Gradient-1048k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4455588948434598 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4345903107069573 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42975 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29404920212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge/6b615d1d-7dab-4414-88a2-72fff1b5fce7.json b/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge/6b615d1d-7dab-4414-88a2-72fff1b5fce7.json deleted file mode 100644 index 2f35b0662ac165a690964cb966b7054a5bbdc9fd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge/6b615d1d-7dab-4414-88a2-72fff1b5fce7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/grimjim_Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge/1762652580.1827798", - "retrieved_timestamp": "1762652580.182781", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge", - "developer": "grimjim", - "inference_platform": "unknown", - "id": "grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42712447417297217 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4961694535006833 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40432291666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3625332446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge/251c7560-4672-44a6-82df-2b8ce9a99a5e.json b/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge/251c7560-4672-44a6-82df-2b8ce9a99a5e.json deleted file mode 100644 index 0db18b7a14925f298ef0b17168cd168275c5abc5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge/251c7560-4672-44a6-82df-2b8ce9a99a5e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/grimjim_Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge/1762652580.183053", - "retrieved_timestamp": "1762652580.183053", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "grimjim/Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge", - "developer": "grimjim", - "inference_platform": "unknown", - "id": "grimjim/Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6805897241541332 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5021734091176594 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38851041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3684341755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Llama-3.1-8B-Instruct-abliterated_via_adapter/377105ce-c655-47fe-a565-71a4de8c3683.json b/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Llama-3.1-8B-Instruct-abliterated_via_adapter/377105ce-c655-47fe-a565-71a4de8c3683.json deleted file mode 100644 index 581c422f97108f05feaa103a7b29ff7e6755a394..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Llama-3.1-8B-Instruct-abliterated_via_adapter/377105ce-c655-47fe-a565-71a4de8c3683.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/grimjim_Llama-3.1-8B-Instruct-abliterated_via_adapter/1762652580.183267", - "retrieved_timestamp": "1762652580.183268", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter", - "developer": "grimjim", - "inference_platform": "unknown", - "id": "grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48695018107510296 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.510526564708187 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13972809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40103125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3651097074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Llama-3.1-Bonsaikraft-8B-Instruct/5f15d683-bae4-4888-8d1c-352aac802fbe.json b/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Llama-3.1-Bonsaikraft-8B-Instruct/5f15d683-bae4-4888-8d1c-352aac802fbe.json deleted file mode 100644 index e03ecc49732597803e63221a8de466748d58e475..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Llama-3.1-Bonsaikraft-8B-Instruct/5f15d683-bae4-4888-8d1c-352aac802fbe.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/grimjim_Llama-3.1-Bonsaikraft-8B-Instruct/1762652580.1834722", - "retrieved_timestamp": "1762652580.1834729", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "grimjim/Llama-3.1-Bonsaikraft-8B-Instruct", - "developer": "grimjim", - "inference_platform": "unknown", - "id": "grimjim/Llama-3.1-Bonsaikraft-8B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42500121898784116 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5286855891530357 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4235104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3764128989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Magnolia-v2-12B/2cf86f7c-a9a8-48d0-bc10-e8a1f654092c.json b/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Magnolia-v2-12B/2cf86f7c-a9a8-48d0-bc10-e8a1f654092c.json deleted file mode 100644 index dd39331e50af85e5e01eb6fd3663661451ab8d06..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Magnolia-v2-12B/2cf86f7c-a9a8-48d0-bc10-e8a1f654092c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v2-12B/1762652580.184318", - "retrieved_timestamp": "1762652580.184319", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "grimjim/Magnolia-v2-12B", - "developer": "grimjim", - "inference_platform": "unknown", - "id": "grimjim/Magnolia-v2-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3506119318962575 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5290279354217235 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41712499999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3601230053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Magnolia-v3-12B/68faa5a3-82ae-462d-adad-505134024710.json b/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Magnolia-v3-12B/68faa5a3-82ae-462d-adad-505134024710.json deleted file mode 100644 index 77fb9812bd46c9969534376b51c4892b1caf7e3e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Magnolia-v3-12B/68faa5a3-82ae-462d-adad-505134024710.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v3-12B/1762652580.184813", - "retrieved_timestamp": "1762652580.184814", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "grimjim/Magnolia-v3-12B", - "developer": "grimjim", - "inference_platform": "unknown", - "id": "grimjim/Magnolia-v3-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39649906692021614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5326669270363916 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1351963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4183958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615359042553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Magnolia-v4-12B/a48116ed-d4bf-4f06-94aa-2ef8364bd8d2.json b/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Magnolia-v4-12B/a48116ed-d4bf-4f06-94aa-2ef8364bd8d2.json deleted file mode 100644 index 002dfe6acff5d8930da3ad7272222c3f2b0f2168..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Magnolia-v4-12B/a48116ed-d4bf-4f06-94aa-2ef8364bd8d2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v4-12B/1762652580.18525", - "retrieved_timestamp": "1762652580.185251", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "grimjim/Magnolia-v4-12B", - "developer": "grimjim", - "inference_platform": "unknown", - "id": "grimjim/Magnolia-v4-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34179421712168156 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5430894084668724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42112499999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3671875 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Magnolia-v5a-12B/ff64dcc7-9646-4c53-8b1e-68b62a025574.json b/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Magnolia-v5a-12B/ff64dcc7-9646-4c53-8b1e-68b62a025574.json deleted file mode 100644 index 8b344aee1d50fe8861f560fd51603a29e687dd35..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Magnolia-v5a-12B/ff64dcc7-9646-4c53-8b1e-68b62a025574.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v5a-12B/1762652580.185457", - "retrieved_timestamp": "1762652580.185458", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "grimjim/Magnolia-v5a-12B", - "developer": "grimjim", - "inference_platform": "unknown", - "id": "grimjim/Magnolia-v5a-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41136185321613317 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5311764105029141 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13746223564954682 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4144895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3601230053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/gupta-tanish/gupta-tanish_llama-7b-dpo-baseline/1b962cb9-8754-40ab-b41a-b7cdf1fa3de1.json b/leaderboard_data/HFOpenLLMv2/gupta-tanish/gupta-tanish_llama-7b-dpo-baseline/1b962cb9-8754-40ab-b41a-b7cdf1fa3de1.json deleted file mode 100644 index 8151178883dcca321ed102de6b1ec63efc235da4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/gupta-tanish/gupta-tanish_llama-7b-dpo-baseline/1b962cb9-8754-40ab-b41a-b7cdf1fa3de1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/gupta-tanish_llama-7b-dpo-baseline/1762652580.1871748", - "retrieved_timestamp": "1762652580.1871748", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "gupta-tanish/llama-7b-dpo-baseline", - "developer": "gupta-tanish", - "inference_platform": "unknown", - "id": "gupta-tanish/llama-7b-dpo-baseline" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26930433472076315 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3896894398264714 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.445625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20279255319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube-1.8b-chat/ac8f78b5-a9e1-4e17-a1e7-8a7b8dc22a8d.json b/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube-1.8b-chat/ac8f78b5-a9e1-4e17-a1e7-8a7b8dc22a8d.json deleted file mode 100644 index 5a85215d5526e9f093fd802a40182a52ecfae19a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube-1.8b-chat/ac8f78b5-a9e1-4e17-a1e7-8a7b8dc22a8d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/h2oai_h2o-danube-1.8b-chat/1762652580.188648", - "retrieved_timestamp": "1762652580.188649", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "h2oai/h2o-danube-1.8b-chat", - "developer": "h2oai", - "inference_platform": "unknown", - "id": "h2oai/h2o-danube-1.8b-chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2198699450790569 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3219657593234448 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3988645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13139960106382978 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 1.831 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube3-4b-base/3878bb0d-753f-465a-a8c1-8408f8f5bfcf.json b/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube3-4b-base/3878bb0d-753f-465a-a8c1-8408f8f5bfcf.json deleted file mode 100644 index 7ceee720fabbe8bcda7c2504cabca26793248933..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube3-4b-base/3878bb0d-753f-465a-a8c1-8408f8f5bfcf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/h2oai_h2o-danube3-4b-base/1762652580.18891", - "retrieved_timestamp": "1762652580.1889112", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "h2oai/h2o-danube3-4b-base", - "developer": "h2oai", - "inference_platform": "unknown", - "id": "h2oai/h2o-danube3-4b-base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23380851695722904 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3599083951265592 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.022658610271903322 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37781250000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2109375 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.962 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube3-4b-chat/d3df3cb7-5e79-49e5-9ed1-1e2771318915.json b/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube3-4b-chat/d3df3cb7-5e79-49e5-9ed1-1e2771318915.json deleted file mode 100644 index 9f9d53492c9cde7d5efa9e1ccb5cb14d9c95f85b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube3-4b-chat/d3df3cb7-5e79-49e5-9ed1-1e2771318915.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/h2oai_h2o-danube3-4b-chat/1762652580.1891232", - "retrieved_timestamp": "1762652580.189124", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "h2oai/h2o-danube3-4b-chat", - "developer": "h2oai", - "inference_platform": "unknown", - "id": "h2oai/h2o-danube3-4b-chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3628771659197596 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3466170643135169 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.378125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22282247340425532 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.962 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube3-500m-chat/c917765b-a4b4-4e5d-9c11-eed791349daf.json b/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube3-500m-chat/c917765b-a4b4-4e5d-9c11-eed791349daf.json deleted file mode 100644 index 71977428b9b226b9b96964fe153c3fd795c878a7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube3-500m-chat/c917765b-a4b4-4e5d-9c11-eed791349daf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/h2oai_h2o-danube3-500m-chat/1762652580.1893299", - "retrieved_timestamp": "1762652580.1893299", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "h2oai/h2o-danube3-500m-chat", - "developer": "h2oai", - "inference_platform": "unknown", - "id": "h2oai/h2o-danube3-500m-chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2207941594968018 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3034691168308313 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23070469798657717 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34339583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11436170212765957 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.514 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube3.1-4b-chat/5f5d83bd-91e9-416b-b40d-506f3861ed3f.json b/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube3.1-4b-chat/5f5d83bd-91e9-416b-b40d-506f3861ed3f.json deleted file mode 100644 index ac268c5f79994ba39b80232dfeacfb75c17bb62a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube3.1-4b-chat/5f5d83bd-91e9-416b-b40d-506f3861ed3f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/h2oai_h2o-danube3.1-4b-chat/1762652580.189556", - "retrieved_timestamp": "1762652580.189557", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "h2oai/h2o-danube3.1-4b-chat", - "developer": "h2oai", - "inference_platform": "unknown", - "id": "h2oai/h2o-danube3.1-4b-chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5021121734774842 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3608421638178268 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41015625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2718583776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.962 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/haoranxu/haoranxu_ALMA-13B-R/9446f216-e3d6-4fca-ae00-937b4a76e5bf.json b/leaderboard_data/HFOpenLLMv2/haoranxu/haoranxu_ALMA-13B-R/9446f216-e3d6-4fca-ae00-937b4a76e5bf.json deleted file mode 100644 index 14c091ead753a8e219214287b8259e5656757855..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/haoranxu/haoranxu_ALMA-13B-R/9446f216-e3d6-4fca-ae00-937b4a76e5bf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/haoranxu_ALMA-13B-R/1762652580.189782", - "retrieved_timestamp": "1762652580.189783", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "haoranxu/ALMA-13B-R", - "developer": "haoranxu", - "inference_platform": "unknown", - "id": "haoranxu/ALMA-13B-R" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.003921816336210145 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.345656261205981 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35279166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18168218085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 13.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/haoranxu/haoranxu_Llama-3-Instruct-8B-CPO-SimPO/aa67ad0b-e469-4b49-a797-4542370a2e94.json b/leaderboard_data/HFOpenLLMv2/haoranxu/haoranxu_Llama-3-Instruct-8B-CPO-SimPO/aa67ad0b-e469-4b49-a797-4542370a2e94.json deleted file mode 100644 index c837d275dd90fd5c47a2869c5a46aecf7074ac78..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/haoranxu/haoranxu_Llama-3-Instruct-8B-CPO-SimPO/aa67ad0b-e469-4b49-a797-4542370a2e94.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/haoranxu_Llama-3-Instruct-8B-CPO-SimPO/1762652580.190052", - "retrieved_timestamp": "1762652580.190052", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "haoranxu/Llama-3-Instruct-8B-CPO-SimPO", - "developer": "haoranxu", - "inference_platform": "unknown", - "id": "haoranxu/Llama-3-Instruct-8B-CPO-SimPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7046447869430887 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5048301774821616 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3566666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686003989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/haoranxu/haoranxu_Llama-3-Instruct-8B-SimPO/39aa4e41-376f-4ee6-8925-8bf746a871a0.json b/leaderboard_data/HFOpenLLMv2/haoranxu/haoranxu_Llama-3-Instruct-8B-SimPO/39aa4e41-376f-4ee6-8925-8bf746a871a0.json deleted file mode 100644 index d2caac9c630d7c9d878aaa36e576dead451c5e18..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/haoranxu/haoranxu_Llama-3-Instruct-8B-SimPO/39aa4e41-376f-4ee6-8925-8bf746a871a0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/haoranxu_Llama-3-Instruct-8B-SimPO/1762652580.190277", - "retrieved_timestamp": "1762652580.1902778", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "haoranxu/Llama-3-Instruct-8B-SimPO", - "developer": "haoranxu", - "inference_platform": "unknown", - "id": "haoranxu/Llama-3-Instruct-8B-SimPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7347449212533854 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49792360151415016 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08761329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35660416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37333776595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/hatemmahmoud/hatemmahmoud_qwen2.5-1.5b-sft-raft-grpo-hra-doc/7d3c185f-4b4f-4bdd-bac9-f4ba2410f40c.json b/leaderboard_data/HFOpenLLMv2/hatemmahmoud/hatemmahmoud_qwen2.5-1.5b-sft-raft-grpo-hra-doc/7d3c185f-4b4f-4bdd-bac9-f4ba2410f40c.json deleted file mode 100644 index 8d3d3a6e3e8519f99a9f08ed969cdbd3b45e6ece..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/hatemmahmoud/hatemmahmoud_qwen2.5-1.5b-sft-raft-grpo-hra-doc/7d3c185f-4b4f-4bdd-bac9-f4ba2410f40c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hatemmahmoud_qwen2.5-1.5b-sft-raft-grpo-hra-doc/1762652580.190489", - "retrieved_timestamp": "1762652580.190489", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hatemmahmoud/qwen2.5-1.5b-sft-raft-grpo-hra-doc", - "developer": "hatemmahmoud", - "inference_platform": "unknown", - "id": "hatemmahmoud/qwen2.5-1.5b-sft-raft-grpo-hra-doc" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41958004760701606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4269926809768501 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36097916666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.277593085106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/hon9kon9ize/hon9kon9ize_CantoneseLLMChat-v0.5/6e87be06-ca0e-48a4-ae28-4a5794600117.json b/leaderboard_data/HFOpenLLMv2/hon9kon9ize/hon9kon9ize_CantoneseLLMChat-v0.5/6e87be06-ca0e-48a4-ae28-4a5794600117.json deleted file mode 100644 index 1f28be60e66417ad946ad0f82f25923a7147fc21..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/hon9kon9ize/hon9kon9ize_CantoneseLLMChat-v0.5/6e87be06-ca0e-48a4-ae28-4a5794600117.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hon9kon9ize_CantoneseLLMChat-v0.5/1762652580.190754", - "retrieved_timestamp": "1762652580.1907551", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hon9kon9ize/CantoneseLLMChat-v0.5", - "developer": "hon9kon9ize", - "inference_platform": "unknown", - "id": "hon9kon9ize/CantoneseLLMChat-v0.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3230849701015528 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43452388803059244 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4706458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2504155585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.069 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/hon9kon9ize/hon9kon9ize_CantoneseLLMChat-v1.0-7B/cccf983e-e1b8-4f0f-b147-abccdea65548.json b/leaderboard_data/HFOpenLLMv2/hon9kon9ize/hon9kon9ize_CantoneseLLMChat-v1.0-7B/cccf983e-e1b8-4f0f-b147-abccdea65548.json deleted file mode 100644 index f24222406d570e42ee85090af6410a88f8d69c0c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/hon9kon9ize/hon9kon9ize_CantoneseLLMChat-v1.0-7B/cccf983e-e1b8-4f0f-b147-abccdea65548.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hon9kon9ize_CantoneseLLMChat-v1.0-7B/1762652580.191013", - "retrieved_timestamp": "1762652580.191013", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hon9kon9ize/CantoneseLLMChat-v1.0-7B", - "developer": "hon9kon9ize", - "inference_platform": "unknown", - "id": "hon9kon9ize/CantoneseLLMChat-v1.0-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44548353923146145 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4865734655539633 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2107250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3882916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3784906914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/hongbai12/hongbai12_li-0.4-pre/ab7dcb4c-3884-428f-b342-38034dd51b56.json b/leaderboard_data/HFOpenLLMv2/hongbai12/hongbai12_li-0.4-pre/ab7dcb4c-3884-428f-b342-38034dd51b56.json deleted file mode 100644 index 041421a78dcede8cebcb60562cc80a69c7e3bfd6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/hongbai12/hongbai12_li-0.4-pre/ab7dcb4c-3884-428f-b342-38034dd51b56.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hongbai12_li-0.4-pre/1762652580.191224", - "retrieved_timestamp": "1762652580.191225", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hongbai12/li-0.4-pre", - "developer": "hongbai12", - "inference_platform": "unknown", - "id": "hongbai12/li-0.4-pre" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5199725616918665 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6298274927108823 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49244712990936557 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4513020833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5014960106382979 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_Falcon3Slerp1-10B/376d342c-669b-4c76-9e7b-d49566ac441d.json b/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_Falcon3Slerp1-10B/376d342c-669b-4c76-9e7b-d49566ac441d.json deleted file mode 100644 index 4fb45f91cb84040ea9ed47e588d426a9387c37c6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_Falcon3Slerp1-10B/376d342c-669b-4c76-9e7b-d49566ac441d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_Falcon3Slerp1-10B/1762652580.19171", - "retrieved_timestamp": "1762652580.191711", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/Falcon3Slerp1-10B", - "developer": "hotmailuser", - "inference_platform": "unknown", - "id": "hotmailuser/Falcon3Slerp1-10B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5694069513335727 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.616984966186231 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43176041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4401595744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_Falcon3Slerp2-10B/bae0b772-8ae6-4fed-ae78-d6d83e560a95.json b/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_Falcon3Slerp2-10B/bae0b772-8ae6-4fed-ae78-d6d83e560a95.json deleted file mode 100644 index 0e843e80c447a4e57d7582922ade2ae8e1fe987e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_Falcon3Slerp2-10B/bae0b772-8ae6-4fed-ae78-d6d83e560a95.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_Falcon3Slerp2-10B/1762652580.191951", - "retrieved_timestamp": "1762652580.191952", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/Falcon3Slerp2-10B", - "developer": "hotmailuser", - "inference_platform": "unknown", - "id": "hotmailuser/Falcon3Slerp2-10B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6117966994241945 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6164263500746402 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23187311178247735 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4095625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4369182180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_Falcon3Slerp4-10B/d5466af4-2bef-4ce8-a659-9e05a5e674b6.json b/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_Falcon3Slerp4-10B/d5466af4-2bef-4ce8-a659-9e05a5e674b6.json deleted file mode 100644 index fd6f6c77c944772a38b64a5de6246db3b70d3284..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_Falcon3Slerp4-10B/d5466af4-2bef-4ce8-a659-9e05a5e674b6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_Falcon3Slerp4-10B/1762652580.19215", - "retrieved_timestamp": "1762652580.192151", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/Falcon3Slerp4-10B", - "developer": "hotmailuser", - "inference_platform": "unknown", - "id": "hotmailuser/Falcon3Slerp4-10B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6072254950198805 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.611433776236228 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22885196374622357 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40175 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4387466755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp-3B/2db7aa3c-4969-40c0-b8c6-1ff5c953ba23.json b/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp-3B/2db7aa3c-4969-40c0-b8c6-1ff5c953ba23.json deleted file mode 100644 index cdc181d82e85b0160ee3b0d748adc68e0239b388..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp-3B/2db7aa3c-4969-40c0-b8c6-1ff5c953ba23.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp-3B/1762652580.19236", - "retrieved_timestamp": "1762652580.1923609", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/FalconSlerp-3B", - "developer": "hotmailuser", - "inference_platform": "unknown", - "id": "hotmailuser/FalconSlerp-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5694568190179834 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46239111387485293 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3989270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29679188829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.228 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp1-7B/5d01fa6d-4280-4926-b166-e98892ee60f4.json b/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp1-7B/5d01fa6d-4280-4926-b166-e98892ee60f4.json deleted file mode 100644 index 29d7c94608e762eacab8d110d759ec024100b891..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp1-7B/5d01fa6d-4280-4926-b166-e98892ee60f4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp1-7B/1762652580.1925812", - "retrieved_timestamp": "1762652580.192582", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/FalconSlerp1-7B", - "developer": "hotmailuser", - "inference_platform": "unknown", - "id": "hotmailuser/FalconSlerp1-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5394564200765082 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5354677787663963 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23791540785498488 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44525 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4128989361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp2-7B/fc8605ad-f7b9-4a73-afd3-85b996fc2549.json b/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp2-7B/fc8605ad-f7b9-4a73-afd3-85b996fc2549.json deleted file mode 100644 index 41eff7414f97c87bb751bd1f96893694e97d658f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp2-7B/fc8605ad-f7b9-4a73-afd3-85b996fc2549.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp2-7B/1762652580.1928341", - "retrieved_timestamp": "1762652580.192835", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/FalconSlerp2-7B", - "developer": "hotmailuser", - "inference_platform": "unknown", - "id": "hotmailuser/FalconSlerp2-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6160432097944565 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5537805428914538 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2983383685800604 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44788541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4140625 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp3-10B/f933fbc2-370e-4231-94a9-c833c2aa793d.json b/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp3-10B/f933fbc2-370e-4231-94a9-c833c2aa793d.json deleted file mode 100644 index beaf60bd2b586991086d222baddad8598b5840a2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp3-10B/f933fbc2-370e-4231-94a9-c833c2aa793d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp3-10B/1762652580.1930392", - "retrieved_timestamp": "1762652580.19304", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/FalconSlerp3-10B", - "developer": "hotmailuser", - "inference_platform": "unknown", - "id": "hotmailuser/FalconSlerp3-10B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6001564737119731 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6060288025434474 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22734138972809667 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4030833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4323470744680851 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp3-7B/017a681e-1bbb-4890-bfcc-f276954678e1.json b/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp3-7B/017a681e-1bbb-4890-bfcc-f276954678e1.json deleted file mode 100644 index 7f506426dce3944c317177b6d775b14fa2571528..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp3-7B/017a681e-1bbb-4890-bfcc-f276954678e1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp3-7B/1762652580.193249", - "retrieved_timestamp": "1762652580.19325", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/FalconSlerp3-7B", - "developer": "hotmailuser", - "inference_platform": "unknown", - "id": "hotmailuser/FalconSlerp3-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6096235765546527 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5532966528909408 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45067708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41273271276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp4-7B/d6ac7c9f-212e-4000-b89e-d977122d2e2b.json b/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp4-7B/d6ac7c9f-212e-4000-b89e-d977122d2e2b.json deleted file mode 100644 index aeab40c12ea7f3083c3d3016a242a2dd26fbc96d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp4-7B/d6ac7c9f-212e-4000-b89e-d977122d2e2b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp4-7B/1762652580.193457", - "retrieved_timestamp": "1762652580.1934578", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/FalconSlerp4-7B", - "developer": "hotmailuser", - "inference_platform": "unknown", - "id": "hotmailuser/FalconSlerp4-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6284580468711907 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5523506352993854 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2212990936555891 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4585208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4031748670212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp6-7B/88a4587f-d3d4-4b08-b800-13a2daf4a660.json b/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp6-7B/88a4587f-d3d4-4b08-b800-13a2daf4a660.json deleted file mode 100644 index ae93e5f22086277e344417f591956ff6b8776c9e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp6-7B/88a4587f-d3d4-4b08-b800-13a2daf4a660.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp6-7B/1762652580.193665", - "retrieved_timestamp": "1762652580.193666", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/FalconSlerp6-7B", - "developer": "hotmailuser", - "inference_platform": "unknown", - "id": "hotmailuser/FalconSlerp6-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6026542906155667 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5383801786207648 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20468277945619334 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44921875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39951795212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_RombosBeagle-v2beta-MGS-32B/c507c0ac-759a-4013-8dd0-7ab5a959ca65.json b/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_RombosBeagle-v2beta-MGS-32B/c507c0ac-759a-4013-8dd0-7ab5a959ca65.json deleted file mode 100644 index 8c92345067d4c51b90d3effc34f7d2707df07069..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_RombosBeagle-v2beta-MGS-32B/c507c0ac-759a-4013-8dd0-7ab5a959ca65.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_RombosBeagle-v2beta-MGS-32B/1762652580.199307", - "retrieved_timestamp": "1762652580.199308", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/RombosBeagle-v2beta-MGS-32B", - "developer": "hotmailuser", - "inference_platform": "unknown", - "id": "hotmailuser/RombosBeagle-v2beta-MGS-32B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5156761836371937 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7037350002757341 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49924471299093653 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5020833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5907579787234043 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_QwQ-32B-Coder-Fusion-7030/5fb3b31d-8c2c-4d76-8532-1bff0f793f4b.json b/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_QwQ-32B-Coder-Fusion-7030/5fb3b31d-8c2c-4d76-8532-1bff0f793f4b.json deleted file mode 100644 index 247302ea67ca857b30c25ffa6c09fbe0fb8bc504..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_QwQ-32B-Coder-Fusion-7030/5fb3b31d-8c2c-4d76-8532-1bff0f793f4b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/huihui-ai_QwQ-32B-Coder-Fusion-7030/1762652580.2006452", - "retrieved_timestamp": "1762652580.200646", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "huihui-ai/QwQ-32B-Coder-Fusion-7030", - "developer": "huihui-ai", - "inference_platform": "unknown", - "id": "huihui-ai/QwQ-32B-Coder-Fusion-7030" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38650779930584184 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6177864730931621 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2794561933534743 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39222916666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4367519946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_QwQ-32B-Coder-Fusion-8020/461ee093-b573-4ce9-9168-c9852dc9745b.json b/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_QwQ-32B-Coder-Fusion-8020/461ee093-b573-4ce9-9168-c9852dc9745b.json deleted file mode 100644 index 7849a5bed152924dfb177a963267a435054bcb83..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_QwQ-32B-Coder-Fusion-8020/461ee093-b573-4ce9-9168-c9852dc9745b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/huihui-ai_QwQ-32B-Coder-Fusion-8020/1762652580.200916", - "retrieved_timestamp": "1762652580.200917", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "huihui-ai/QwQ-32B-Coder-Fusion-8020", - "developer": "huihui-ai", - "inference_platform": "unknown", - "id": "huihui-ai/QwQ-32B-Coder-Fusion-8020" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6020547702318737 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6664531829718748 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3548657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42934374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5367353723404256 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_QwQ-32B-Coder-Fusion-9010/41d5fb44-855b-4ff1-8f5d-95b8a9f9a9af.json b/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_QwQ-32B-Coder-Fusion-9010/41d5fb44-855b-4ff1-8f5d-95b8a9f9a9af.json deleted file mode 100644 index 4e2cd9c3642dcb2905d336e63bd5bb0c2d7802af..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_QwQ-32B-Coder-Fusion-9010/41d5fb44-855b-4ff1-8f5d-95b8a9f9a9af.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/huihui-ai_QwQ-32B-Coder-Fusion-9010/1762652580.201131", - "retrieved_timestamp": "1762652580.201132", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "huihui-ai/QwQ-32B-Coder-Fusion-9010", - "developer": "huihui-ai", - "inference_platform": "unknown", - "id": "huihui-ai/QwQ-32B-Coder-Fusion-9010" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5778246164620984 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6727405551499568 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615771812080537 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4681979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5600066489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_Qwen2.5-14B-Instruct-abliterated-v2/92cad41b-64b5-48db-b865-77d0ea2ef834.json b/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_Qwen2.5-14B-Instruct-abliterated-v2/92cad41b-64b5-48db-b865-77d0ea2ef834.json deleted file mode 100644 index a4199f2decdbd41f91f10969efa338d1942c98fa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_Qwen2.5-14B-Instruct-abliterated-v2/92cad41b-64b5-48db-b865-77d0ea2ef834.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/huihui-ai_Qwen2.5-14B-Instruct-abliterated-v2/1762652580.201351", - "retrieved_timestamp": "1762652580.201352", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2", - "developer": "huihui-ai", - "inference_platform": "unknown", - "id": "huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8327637335602867 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6323822447052897 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5302114803625377 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42196875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49617686170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_Qwen2.5-72B-Instruct-abliterated/b892c2f3-4aa6-4b19-80e5-1b0f5e0eda25.json b/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_Qwen2.5-72B-Instruct-abliterated/b892c2f3-4aa6-4b19-80e5-1b0f5e0eda25.json deleted file mode 100644 index c092ca338c8d0352ef7548e985fb9086006036e3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_Qwen2.5-72B-Instruct-abliterated/b892c2f3-4aa6-4b19-80e5-1b0f5e0eda25.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/huihui-ai_Qwen2.5-72B-Instruct-abliterated/1762652580.2015731", - "retrieved_timestamp": "1762652580.2015731", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "huihui-ai/Qwen2.5-72B-Instruct-abliterated", - "developer": "huihui-ai", - "inference_platform": "unknown", - "id": "huihui-ai/Qwen2.5-72B-Instruct-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8592667455684251 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7189881596250237 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4232708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5536901595744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_Qwen2.5-7B-Instruct-abliterated-v2/15c4b42b-ee8f-4f0d-8d54-7d827133fe7f.json b/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_Qwen2.5-7B-Instruct-abliterated-v2/15c4b42b-ee8f-4f0d-8d54-7d827133fe7f.json deleted file mode 100644 index 1a268fe24f67ab658f52ecb2d52aabd624e711ad..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_Qwen2.5-7B-Instruct-abliterated-v2/15c4b42b-ee8f-4f0d-8d54-7d827133fe7f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/huihui-ai_Qwen2.5-7B-Instruct-abliterated-v2/1762652580.201998", - "retrieved_timestamp": "1762652580.201998", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2", - "developer": "huihui-ai", - "inference_platform": "unknown", - "id": "huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7606484128778308 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5376688442794247 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4637462235649547 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3980625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42079454787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_Qwen2.5-7B-Instruct-abliterated/625501d4-5d1e-48e0-8690-e301c51f652d.json b/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_Qwen2.5-7B-Instruct-abliterated/625501d4-5d1e-48e0-8690-e301c51f652d.json deleted file mode 100644 index 4d27c50fea579158be012af15615a04a224ce059..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_Qwen2.5-7B-Instruct-abliterated/625501d4-5d1e-48e0-8690-e301c51f652d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/huihui-ai_Qwen2.5-7B-Instruct-abliterated/1762652580.201783", - "retrieved_timestamp": "1762652580.2017841", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "huihui-ai/Qwen2.5-7B-Instruct-abliterated", - "developer": "huihui-ai", - "inference_platform": "unknown", - "id": "huihui-ai/Qwen2.5-7B-Instruct-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7546033413564897 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5261589972829911 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45770392749244715 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39666666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41796875 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/huu-ontocord/huu-ontocord_wide_3b_orpo_stage1.1-ss1-orpo3/50854a36-b87e-421d-b8d5-7a46054ecc59.json b/leaderboard_data/HFOpenLLMv2/huu-ontocord/huu-ontocord_wide_3b_orpo_stage1.1-ss1-orpo3/50854a36-b87e-421d-b8d5-7a46054ecc59.json deleted file mode 100644 index b87f1aa3e036c05066a6e5d6db030d2d33f4bf6d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/huu-ontocord/huu-ontocord_wide_3b_orpo_stage1.1-ss1-orpo3/50854a36-b87e-421d-b8d5-7a46054ecc59.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/huu-ontocord_wide_3b_orpo_stage1.1-ss1-orpo3/1762652580.202209", - "retrieved_timestamp": "1762652580.20221", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "huu-ontocord/wide_3b_orpo_stage1.1-ss1-orpo3", - "developer": "huu-ontocord", - "inference_platform": "unknown", - "id": "huu-ontocord/wide_3b_orpo_stage1.1-ss1-orpo3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15052726764983576 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936618285636837 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36178125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11643949468085106 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/iRyanBell/iRyanBell_ARC1-II/19afc23f-5849-4147-b240-9bb7ddea4d58.json b/leaderboard_data/HFOpenLLMv2/iRyanBell/iRyanBell_ARC1-II/19afc23f-5849-4147-b240-9bb7ddea4d58.json deleted file mode 100644 index a19cbb2608ca9c2867549f13529eacaf074cef20..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/iRyanBell/iRyanBell_ARC1-II/19afc23f-5849-4147-b240-9bb7ddea4d58.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/iRyanBell_ARC1-II/1762652580.204559", - "retrieved_timestamp": "1762652580.204561", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "iRyanBell/ARC1-II", - "developer": "iRyanBell", - "inference_platform": "unknown", - "id": "iRyanBell/ARC1-II" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17083560508340093 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33817781029884353 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4912916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1685505319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/iRyanBell/iRyanBell_ARC1/62f9b47d-2860-44b3-8abb-3d441f4bdeb4.json b/leaderboard_data/HFOpenLLMv2/iRyanBell/iRyanBell_ARC1/62f9b47d-2860-44b3-8abb-3d441f4bdeb4.json deleted file mode 100644 index 891c3fc0ada17a30cc8da595299b7c30fa939a59..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/iRyanBell/iRyanBell_ARC1/62f9b47d-2860-44b3-8abb-3d441f4bdeb4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/iRyanBell_ARC1/1762652580.204204", - "retrieved_timestamp": "1762652580.204204", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "iRyanBell/ARC1", - "developer": "iRyanBell", - "inference_platform": "unknown", - "id": "iRyanBell/ARC1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.441112913735555 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4902999658144703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3990520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3371010638297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ibivibiv/ibivibiv_colossus_120b/f0bcf710-b1a8-4736-9fd3-6b0ea241155e.json b/leaderboard_data/HFOpenLLMv2/ibivibiv/ibivibiv_colossus_120b/f0bcf710-b1a8-4736-9fd3-6b0ea241155e.json deleted file mode 100644 index 3e8c4d80bdce9b6650b38ea62afa6ff2a92abbef..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ibivibiv/ibivibiv_colossus_120b/f0bcf710-b1a8-4736-9fd3-6b0ea241155e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ibivibiv_colossus_120b/1762652580.2048829", - "retrieved_timestamp": "1762652580.204884", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ibivibiv/colossus_120b", - "developer": "ibivibiv", - "inference_platform": "unknown", - "id": "ibivibiv/colossus_120b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42759877126025614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6061408586494191 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4733125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3961103723404255 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 117.749 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ibivibiv/ibivibiv_multimaster-7b-v6/7044a4d4-1c07-40ef-917c-d242b61d7877.json b/leaderboard_data/HFOpenLLMv2/ibivibiv/ibivibiv_multimaster-7b-v6/7044a4d4-1c07-40ef-917c-d242b61d7877.json deleted file mode 100644 index 3db4609239c15b784758708f18244e1624c57211..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ibivibiv/ibivibiv_multimaster-7b-v6/7044a4d4-1c07-40ef-917c-d242b61d7877.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ibivibiv_multimaster-7b-v6/1762652580.205187", - "retrieved_timestamp": "1762652580.205188", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ibivibiv/multimaster-7b-v6", - "developer": "ibivibiv", - "inference_platform": "unknown", - "id": "ibivibiv/multimaster-7b-v6" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4473075883101283 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.519351871026721 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43957291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30950797872340424 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 35.428 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-1b-a400m-base/52e253ba-0291-4e78-b292-806cabe74697.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-1b-a400m-base/52e253ba-0291-4e78-b292-806cabe74697.json deleted file mode 100644 index 89c7caafdd636f2cff1ae829ac4500900ea1fc0a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-1b-a400m-base/52e253ba-0291-4e78-b292-806cabe74697.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-1b-a400m-base/1762652580.205958", - "retrieved_timestamp": "1762652580.20596", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ibm-granite/granite-3.0-1b-a400m-base", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.0-1b-a400m-base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24040324117785256 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221205531032148 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3367291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11519281914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteForCausalLM", - "params_billions": 1.335 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-1b-a400m-instruct/afc49838-c7fc-40ed-841f-74b0bc3dd36e.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-1b-a400m-instruct/afc49838-c7fc-40ed-841f-74b0bc3dd36e.json deleted file mode 100644 index c956d5079e9084909295cb6e65031f88704d28c1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-1b-a400m-instruct/afc49838-c7fc-40ed-841f-74b0bc3dd36e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-1b-a400m-instruct/1762652580.206321", - "retrieved_timestamp": "1762652580.206322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ibm-granite/granite-3.0-1b-a400m-instruct", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.0-1b-a400m-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33315159332792543 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3223950988485842 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36228124999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12441821808510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteForCausalLM", - "params_billions": 1.335 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-2b-base/184f8ef6-7cb7-45f2-b983-70dc4503a968.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-2b-base/184f8ef6-7cb7-45f2-b983-70dc4503a968.json deleted file mode 100644 index e299c3e5a57c6c76c58102b78e6ccbccbf3520e4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-2b-base/184f8ef6-7cb7-45f2-b983-70dc4503a968.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-2b-base/1762652580.206552", - "retrieved_timestamp": "1762652580.206552", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ibm-granite/granite-3.0-2b-base", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.0-2b-base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873821460391761 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40474805593806223 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28020134228187926 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3434270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23811502659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteForCausalLM", - "params_billions": 2.634 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-2b-instruct/ec853cc1-7c48-4334-9ff6-d9669750570b.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-2b-instruct/ec853cc1-7c48-4334-9ff6-d9669750570b.json deleted file mode 100644 index f557c6b8a8f315e40d551a5619d2a5fce2f695c9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-2b-instruct/ec853cc1-7c48-4334-9ff6-d9669750570b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-2b-instruct/1762652580.206777", - "retrieved_timestamp": "1762652580.206777", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ibm-granite/granite-3.0-2b-instruct", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.0-2b-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.513977357854936 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44119772062630297 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09214501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35148958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2814162234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteForCausalLM", - "params_billions": 2.634 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-3b-a800m-base/f917bdff-4be5-440b-8e62-bb9f7b0dd0f5.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-3b-a800m-base/f917bdff-4be5-440b-8e62-bb9f7b0dd0f5.json deleted file mode 100644 index 7b7e69638718aed3058b404af48b2acf57cb6b91..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-3b-a800m-base/f917bdff-4be5-440b-8e62-bb9f7b0dd0f5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-3b-a800m-base/1762652580.20698", - "retrieved_timestamp": "1762652580.20698", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ibm-granite/granite-3.0-3b-a800m-base", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.0-3b-a800m-base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2732261510569733 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36674974971308566 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34196875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18907912234042554 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteForCausalLM", - "params_billions": 3.374 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-3b-a800m-instruct/7c92caf5-df83-4c8e-ab85-f99c7ac43f63.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-3b-a800m-instruct/7c92caf5-df83-4c8e-ab85-f99c7ac43f63.json deleted file mode 100644 index baf2237013cf87d7a08bf71d2dd0b5912a8660eb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-3b-a800m-instruct/7c92caf5-df83-4c8e-ab85-f99c7ac43f63.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-3b-a800m-instruct/1762652580.2071838", - "retrieved_timestamp": "1762652580.2071848", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ibm-granite/granite-3.0-3b-a800m-instruct", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.0-3b-a800m-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4298217618142085 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37527805291733446 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3486666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21517619680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteForCausalLM", - "params_billions": 3.374 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-8b-base/b7b71327-323b-4b7c-92a1-426911bed479.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-8b-base/b7b71327-323b-4b7c-92a1-426911bed479.json deleted file mode 100644 index a7bfa635de7cc08af302f2d322edfb6dff96cae1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-8b-base/b7b71327-323b-4b7c-92a1-426911bed479.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-8b-base/1762652580.207386", - "retrieved_timestamp": "1762652580.207386", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ibm-granite/granite-3.0-8b-base", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.0-8b-base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4583482936386566 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4943760637365333 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10120845921450151 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40813541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3312832446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteForCausalLM", - "params_billions": 8.171 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-8b-instruct/d4dc4d78-33a3-428c-9490-382dd0c19c08.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-8b-instruct/d4dc4d78-33a3-428c-9490-382dd0c19c08.json deleted file mode 100644 index bb0842ef7c7449595f9dc3650d713c940f8ab0d5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-8b-instruct/d4dc4d78-33a3-428c-9490-382dd0c19c08.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-8b-instruct/1762652580.207594", - "retrieved_timestamp": "1762652580.207595", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ibm-granite/granite-3.0-8b-instruct", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.0-8b-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5309633993359841 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5191874631840226 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1419939577039275 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3900625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34566156914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteForCausalLM", - "params_billions": 8.171 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-1b-a400m-base/17192714-a653-428d-a7c7-06dd41db77fa.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-1b-a400m-base/17192714-a653-428d-a7c7-06dd41db77fa.json deleted file mode 100644 index cebdea0f81c55beb811966a56c30d88d4f08eda2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-1b-a400m-base/17192714-a653-428d-a7c7-06dd41db77fa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-1b-a400m-base/1762652580.207968", - "retrieved_timestamp": "1762652580.2079701", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ibm-granite/granite-3.1-1b-a400m-base", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.1-1b-a400m-base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2519437315212525 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3298699546506724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3500625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11394614361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteMoeForCausalLM", - "params_billions": 1.335 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-1b-a400m-instruct/8167695b-db96-4687-91b8-0af55e67a606.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-1b-a400m-instruct/8167695b-db96-4687-91b8-0af55e67a606.json deleted file mode 100644 index 77a79c6d61f5b531206bbaacf5b0ac72115ad9c1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-1b-a400m-instruct/8167695b-db96-4687-91b8-0af55e67a606.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-1b-a400m-instruct/1762652580.208256", - "retrieved_timestamp": "1762652580.208257", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ibm-granite/granite-3.1-1b-a400m-instruct", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.1-1b-a400m-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46863987553025976 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3279834385375178 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23993288590604026 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33025 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12167553191489362 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GraniteMoeForCausalLM", - "params_billions": 1.335 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-2b-base/971e6eba-61ff-42e6-9740-1895080ff94f.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-2b-base/971e6eba-61ff-42e6-9740-1895080ff94f.json deleted file mode 100644 index 4fc5f0a9a62e503732f991a3cbf8657b748362a5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-2b-base/971e6eba-61ff-42e6-9740-1895080ff94f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-2b-base/1762652580.208491", - "retrieved_timestamp": "1762652580.208492", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ibm-granite/granite-3.1-2b-base", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.1-2b-base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35216115462528313 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4047188028918873 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3485729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22506648936170212 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteForCausalLM", - "params_billions": 2.534 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-2b-instruct/fcdf14a1-900f-4856-aac6-8ed47910f882.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-2b-instruct/fcdf14a1-900f-4856-aac6-8ed47910f882.json deleted file mode 100644 index 9f4cd39fb01ddee0bac74a36f66e551176b2dbaa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-2b-instruct/fcdf14a1-900f-4856-aac6-8ed47910f882.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-2b-instruct/1762652580.2087219", - "retrieved_timestamp": "1762652580.2087228", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ibm-granite/granite-3.1-2b-instruct", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.1-2b-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.628557782240012 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44089858558056544 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15256797583081572 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3605416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28191489361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GraniteForCausalLM", - "params_billions": 2.534 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-3b-a800m-base/8930e3f9-e0b8-4fb7-91e2-ee34b17cf1eb.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-3b-a800m-base/8930e3f9-e0b8-4fb7-91e2-ee34b17cf1eb.json deleted file mode 100644 index 692dc704fa5fa2c915b0c5cd47f90084e6766420..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-3b-a800m-base/8930e3f9-e0b8-4fb7-91e2-ee34b17cf1eb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-3b-a800m-base/1762652580.20895", - "retrieved_timestamp": "1762652580.208951", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ibm-granite/granite-3.1-3b-a800m-base", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.1-3b-a800m-base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2996294276962903 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.362822992347764 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3275208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1792719414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteMoeForCausalLM", - "params_billions": 3.299 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-3b-a800m-instruct/1e0c27fc-8111-4325-8e61-c24c2f8124f7.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-3b-a800m-instruct/1e0c27fc-8111-4325-8e61-c24c2f8124f7.json deleted file mode 100644 index a8036aa01d51ff61de1feea65cca9c0f8a3885e7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-3b-a800m-instruct/1e0c27fc-8111-4325-8e61-c24c2f8124f7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-3b-a800m-instruct/1762652580.2092001", - "retrieved_timestamp": "1762652580.2092009", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ibm-granite/granite-3.1-3b-a800m-instruct", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.1-3b-a800m-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5516462984880118 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4009494521947192 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3486354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21476063829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GraniteMoeForCausalLM", - "params_billions": 3.299 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-8b-base/10cbee10-0344-4da0-a26a-4298fd8f4d11.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-8b-base/10cbee10-0344-4da0-a26a-4298fd8f4d11.json deleted file mode 100644 index 9b5e19d43d84ccd9d7faf98b4331a41b72121812..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-8b-base/10cbee10-0344-4da0-a26a-4298fd8f4d11.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-8b-base/1762652580.209538", - "retrieved_timestamp": "1762652580.2095392", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ibm-granite/granite-3.1-8b-base", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.1-8b-base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4221033524381973 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4776956677111636 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09441087613293052 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3922291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3232214095744681 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GraniteForCausalLM", - "params_billions": 8.171 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-8b-instruct/6d6b2e81-8b90-4703-aafb-40de92b3ede3.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-8b-instruct/6d6b2e81-8b90-4703-aafb-40de92b3ede3.json deleted file mode 100644 index 8134ed056ab6c82d208763af520ec8f0cd2c99f4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-8b-instruct/6d6b2e81-8b90-4703-aafb-40de92b3ede3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-8b-instruct/1762652580.2098079", - "retrieved_timestamp": "1762652580.2098088", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ibm-granite/granite-3.1-8b-instruct", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.1-8b-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7207564816908026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5364460433816018 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21978851963746224 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47070833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3537234042553192 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GraniteForCausalLM", - "params_billions": 8.171 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.2-2b-instruct/39fd9dc4-88e4-4b52-8527-c1ea692d8ca1.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.2-2b-instruct/39fd9dc4-88e4-4b52-8527-c1ea692d8ca1.json deleted file mode 100644 index 5bd18834a88824a3a5643c22f6dc4f425e6beb8c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.2-2b-instruct/39fd9dc4-88e4-4b52-8527-c1ea692d8ca1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.2-2b-instruct/1762652580.2100549", - "retrieved_timestamp": "1762652580.2100558", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ibm-granite/granite-3.2-2b-instruct", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.2-2b-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6151688630611223 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43872707491212865 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14425981873111782 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3645729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2783410904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteForCausalLM", - "params_billions": 2.534 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.2-8b-instruct/982accb5-ea5c-45bc-8cdd-08edf5e543a1.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.2-8b-instruct/982accb5-ea5c-45bc-8cdd-08edf5e543a1.json deleted file mode 100644 index 20fcb7de4e48dc2923d76892ef439ad9a892b1a9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.2-8b-instruct/982accb5-ea5c-45bc-8cdd-08edf5e543a1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.2-8b-instruct/1762652580.210291", - "retrieved_timestamp": "1762652580.2102919", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ibm-granite/granite-3.2-8b-instruct", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-3.2-8b-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7274509412802475 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5401759656246116 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23791540785498488 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4561979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35123005319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteForCausalLM", - "params_billions": 8.171 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-7b-base/2d21a773-8f72-4b7d-ba94-80867127c54a.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-7b-base/2d21a773-8f72-4b7d-ba94-80867127c54a.json deleted file mode 100644 index 5887bd6b9692542994d9e26bae71013227bbfcd8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-7b-base/2d21a773-8f72-4b7d-ba94-80867127c54a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-7b-base/1762652580.2106082", - "retrieved_timestamp": "1762652580.210609", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ibm-granite/granite-7b-base", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-7b-base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24142719096441884 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34804372716106186 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35548958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18342752659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-7b-instruct/509f5b3a-6110-4757-a313-80181ecd3228.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-7b-instruct/509f5b3a-6110-4757-a313-80181ecd3228.json deleted file mode 100644 index 193bbcb40195e57e7ffd9236e2e1aa449e5b8f2f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-7b-instruct/509f5b3a-6110-4757-a313-80181ecd3228.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ibm-granite_granite-7b-instruct/1762652580.2108219", - "retrieved_timestamp": "1762652580.2108219", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ibm-granite/granite-7b-instruct", - "developer": "ibm-granite", - "inference_platform": "unknown", - "id": "ibm-granite/granite-7b-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2972313461615181 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37229529603269523 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40199999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2286402925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ibm/ibm_PowerLM-3b/f1eb3ba0-225e-49d5-9509-422702927c9f.json b/leaderboard_data/HFOpenLLMv2/ibm/ibm_PowerLM-3b/f1eb3ba0-225e-49d5-9509-422702927c9f.json deleted file mode 100644 index 932ff336f5a83840474a30e8bd0b1196a6342051..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ibm/ibm_PowerLM-3b/f1eb3ba0-225e-49d5-9509-422702927c9f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ibm_PowerLM-3b/1762652580.205445", - "retrieved_timestamp": "1762652580.205446", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ibm/PowerLM-3b", - "developer": "ibm", - "inference_platform": "unknown", - "id": "ibm/PowerLM-3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33212764354135915 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3679456724439114 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3562916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20162898936170212 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GraniteForCausalLM", - "params_billions": 3.512 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ibm/ibm_merlinite-7b/7fdbc273-200d-4085-8a03-8f56cde4f2fc.json b/leaderboard_data/HFOpenLLMv2/ibm/ibm_merlinite-7b/7fdbc273-200d-4085-8a03-8f56cde4f2fc.json deleted file mode 100644 index da47a34a19bf1a366d8de173c1cd32e50aa7be5a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ibm/ibm_merlinite-7b/7fdbc273-200d-4085-8a03-8f56cde4f2fc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ibm_merlinite-7b/1762652580.2057128", - "retrieved_timestamp": "1762652580.205714", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ibm/merlinite-7b", - "developer": "ibm", - "inference_platform": "unknown", - "id": "ibm/merlinite-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2498703440205322 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.50071326118705 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44115624999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3068484042553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.15-02.10-RP/20c0d1f9-24b8-4993-82f1-d9889c18c56a.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.15-02.10-RP/20c0d1f9-24b8-4993-82f1-d9889c18c56a.json deleted file mode 100644 index 58da457d8bbd5547c977804fb457a07af779ad98..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.15-02.10-RP/20c0d1f9-24b8-4993-82f1-d9889c18c56a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.15-02.10-RP/1762652580.211034", - "retrieved_timestamp": "1762652580.211034", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.15-02.10-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.15-02.10-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5343355629729118 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4976384736188401 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43197916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30659906914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.16-02.10-RP/824cb85d-e7a0-421a-994b-c0b178ab8e56.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.16-02.10-RP/824cb85d-e7a0-421a-994b-c0b178ab8e56.json deleted file mode 100644 index f41be5756ce9e40fa3209ec8a39ba774355aa1ec..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.16-02.10-RP/824cb85d-e7a0-421a-994b-c0b178ab8e56.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.16-02.10-RP/1762652580.211284", - "retrieved_timestamp": "1762652580.211284", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.16-02.10-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.16-02.10-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5069083365470286 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4945564313654156 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.433375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3067652925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.17-03.10-RP/2faf039c-9c8e-46db-8472-6b741c451bf1.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.17-03.10-RP/2faf039c-9c8e-46db-8472-6b741c451bf1.json deleted file mode 100644 index 5fcb994744ed5924c36c66097c67cbc0b12be181..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.17-03.10-RP/2faf039c-9c8e-46db-8472-6b741c451bf1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.17-03.10-RP/1762652580.211494", - "retrieved_timestamp": "1762652580.211495", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.17-03.10-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.17-03.10-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5123538876846767 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5006815748225494 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.433375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30851063829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.27-06.11-RP/314c9c7e-0c13-4f6b-be25-d2a2cbc25e9b.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.27-06.11-RP/314c9c7e-0c13-4f6b-be25-d2a2cbc25e9b.json deleted file mode 100644 index 7512defe87e51190f0141f947ef9c3b35232acac..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.27-06.11-RP/314c9c7e-0c13-4f6b-be25-d2a2cbc25e9b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.27-06.11-RP/1762652580.211702", - "retrieved_timestamp": "1762652580.211702", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.27-06.11-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.27-06.11-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49182059158588104 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5111654648230625 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43278125000000006 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3154089095744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.29-06.11-RP/b07e3d05-409f-498a-a324-82c4a592d4dc.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.29-06.11-RP/b07e3d05-409f-498a-a324-82c4a592d4dc.json deleted file mode 100644 index 3443ccf9b466c7123f17217219541e5f018d1751..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.29-06.11-RP/b07e3d05-409f-498a-a324-82c4a592d4dc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.29-06.11-RP/1762652580.2119", - "retrieved_timestamp": "1762652580.211901", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.29-06.11-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.29-06.11-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.486050346414181 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5087880173407883 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4458958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30925864361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.31-08.11-RP/1fc072c6-ad31-4151-8420-7402b565510d.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.31-08.11-RP/1fc072c6-ad31-4151-8420-7402b565510d.json deleted file mode 100644 index 57cc3da2cc65180152a258af064ad9310efef9c0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.31-08.11-RP/1fc072c6-ad31-4151-8420-7402b565510d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.31-08.11-RP/1762652580.212094", - "retrieved_timestamp": "1762652580.212095", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.31-08.11-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.31-08.11-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5145768782386291 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5032134100285419 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42766666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3130817819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.32-10.11-RP/68e99fe4-634e-4462-b1db-d2d40814ff0b.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.32-10.11-RP/68e99fe4-634e-4462-b1db-d2d40814ff0b.json deleted file mode 100644 index 4c606bc49384b1fa979dd5cbfe4c39c1a36871be..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.32-10.11-RP/68e99fe4-634e-4462-b1db-d2d40814ff0b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.32-10.11-RP/1762652580.2122939", - "retrieved_timestamp": "1762652580.2122948", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.32-10.11-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.32-10.11-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49154576523623983 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5047695597611622 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4382083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3100066489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.34b-14.11-RP/ed2a47c3-06c7-451b-94cd-8cd42be2ca9c.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.34b-14.11-RP/ed2a47c3-06c7-451b-94cd-8cd42be2ca9c.json deleted file mode 100644 index bbde5edbc5f3fe61c48ab24775569c44a5dce0c1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.34b-14.11-RP/ed2a47c3-06c7-451b-94cd-8cd42be2ca9c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.34b-14.11-RP/1762652580.2124958", - "retrieved_timestamp": "1762652580.212497", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.34b-14.11-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.34b-14.11-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47620868185303883 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5067195329696937 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4419895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3125 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.34n-14.11-RP/8c6aae5b-6a9b-47fb-908b-6b51159cc9b2.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.34n-14.11-RP/8c6aae5b-6a9b-47fb-908b-6b51159cc9b2.json deleted file mode 100644 index 8b2f923f2f7b1198a7c3d68a2b4b6ff0ff9a8695..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.34n-14.11-RP/8c6aae5b-6a9b-47fb-908b-6b51159cc9b2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.34n-14.11-RP/1762652580.2127092", - "retrieved_timestamp": "1762652580.21271", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.34n-14.11-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.34n-14.11-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47865663107222167 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5091090160356474 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07250755287009064 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4379583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31241688829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.37-18.11-RP/774c0461-5e81-436a-9347-7a4cc15ca019.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.37-18.11-RP/774c0461-5e81-436a-9347-7a4cc15ca019.json deleted file mode 100644 index e6fe45dfbc35845ada946c8bfe2136f326ca52d5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.37-18.11-RP/774c0461-5e81-436a-9347-7a4cc15ca019.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.37-18.11-RP/1762652580.212915", - "retrieved_timestamp": "1762652580.212916", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.37-18.11-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.37-18.11-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4972162750391184 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5084310833712639 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43392708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3143284574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.38-19.11-RP/4d13aaf7-a18d-4bad-ab22-8e08c3f2e16a.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.38-19.11-RP/4d13aaf7-a18d-4bad-ab22-8e08c3f2e16a.json deleted file mode 100644 index 22807c6c24ca60b872adaa50405535d6218c49ef..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.38-19.11-RP/4d13aaf7-a18d-4bad-ab22-8e08c3f2e16a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.38-19.11-RP/1762652580.213116", - "retrieved_timestamp": "1762652580.213117", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.38-19.11-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.38-19.11-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44033830237104216 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.510108216407024 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43671875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31399601063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.39-19.11-RP/780c711f-774b-499e-881e-25dba76273a1.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.39-19.11-RP/780c711f-774b-499e-881e-25dba76273a1.json deleted file mode 100644 index db9ea9a53e812542842cf9863c9c7a58dde1ba3b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.39-19.11-RP/780c711f-774b-499e-881e-25dba76273a1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.39-19.11-RP/1762652580.2133162", - "retrieved_timestamp": "1762652580.2133162", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.39-19.11-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.39-19.11-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47565902915375646 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5092985137525424 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4341458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3126662234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.40-20.11-RP/5220bee5-74d3-4730-9fee-4ca488e1a37e.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.40-20.11-RP/5220bee5-74d3-4730-9fee-4ca488e1a37e.json deleted file mode 100644 index 50573a0e7cbd07bad4bc159363f1f9665f38b305..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.40-20.11-RP/5220bee5-74d3-4730-9fee-4ca488e1a37e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.40-20.11-RP/1762652580.2136111", - "retrieved_timestamp": "1762652580.213614", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.40-20.11-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.40-20.11-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4762585495374495 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.509308586549064 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44459374999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30992353723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.41-22.11-RP/43a30cf0-ccb5-46ce-b520-55ee110002c9.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.41-22.11-RP/43a30cf0-ccb5-46ce-b520-55ee110002c9.json deleted file mode 100644 index 529cd444b6814466e05a8314ab0b357005aa170b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.41-22.11-RP/43a30cf0-ccb5-46ce-b520-55ee110002c9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.41-22.11-RP/1762652580.213999", - "retrieved_timestamp": "1762652580.2140002", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.41-22.11-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.41-22.11-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4620451513096362 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4723318624775949 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45597916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26180186170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.50-16.01-RP/37602e25-bd23-462a-8566-38f3b0fee63d.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.50-16.01-RP/37602e25-bd23-462a-8566-38f3b0fee63d.json deleted file mode 100644 index e7980420a32942a8eabe686eaa5c59f010098d80..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.50-16.01-RP/37602e25-bd23-462a-8566-38f3b0fee63d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.50-16.01-RP/1762652580.214273", - "retrieved_timestamp": "1762652580.214274", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.50-16.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.50-16.01-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43848987353555235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49804682910006176 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4380520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30693151595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.50.1-16.01-RP/fde6323e-0bfe-4ec9-aa86-4371bbd1645a.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.50.1-16.01-RP/fde6323e-0bfe-4ec9-aa86-4371bbd1645a.json deleted file mode 100644 index 9e2a1d58ba32deec7916f2843c77cc966a27ceea..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.50.1-16.01-RP/fde6323e-0bfe-4ec9-aa86-4371bbd1645a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.50.1-16.01-RP/1762652580.214615", - "retrieved_timestamp": "1762652580.214617", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.50.1-16.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.50.1-16.01-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4829031414424837 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5107472937598788 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43274999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3132480053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.51-16.01-RP/7a137ac4-8445-4c1a-9203-abc5f4131213.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.51-16.01-RP/7a137ac4-8445-4c1a-9203-abc5f4131213.json deleted file mode 100644 index 3e4e549361b128bf0db829059719bf237ac7b289..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.51-16.01-RP/7a137ac4-8445-4c1a-9203-abc5f4131213.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.51-16.01-RP/1762652580.214901", - "retrieved_timestamp": "1762652580.214902", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.51-16.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.51-16.01-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4430610779398662 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5044464794803141 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44366666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30601728723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.51.1-16.01-RP/859a9706-f73b-4426-9c5a-052625d62f5b.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.51.1-16.01-RP/859a9706-f73b-4426-9c5a-052625d62f5b.json deleted file mode 100644 index f9bab569027631c4ab03bb5310a8429565ece998..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.51.1-16.01-RP/859a9706-f73b-4426-9c5a-052625d62f5b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.51.1-16.01-RP/1762652580.215148", - "retrieved_timestamp": "1762652580.2151492", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.51.1-16.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.51.1-16.01-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4573243438520902 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5121083021452105 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43938541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104222074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.52-16.01-RP/72412b78-cc3e-4652-9034-32c72aee5796.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.52-16.01-RP/72412b78-cc3e-4652-9034-32c72aee5796.json deleted file mode 100644 index 6a1fed871fc9ccfb16d0aa117ad59712c61af6b5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.52-16.01-RP/72412b78-cc3e-4652-9034-32c72aee5796.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.52-16.01-RP/1762652580.21541", - "retrieved_timestamp": "1762652580.215412", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.52-16.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.52-16.01-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4503051902285935 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.504677500406742 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43960416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3080119680851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.52.1-16.01-RP/6bfbd9d6-b376-4169-8e6a-2c3210040e97.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.52.1-16.01-RP/6bfbd9d6-b376-4169-8e6a-2c3210040e97.json deleted file mode 100644 index fdaa843bda7fed8ff52fc4461df66a6938033616..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.52.1-16.01-RP/6bfbd9d6-b376-4169-8e6a-2c3210040e97.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.52.1-16.01-RP/1762652580.21567", - "retrieved_timestamp": "1762652580.215671", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.52.1-16.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.52.1-16.01-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45492626231731803 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.510648341878344 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43938541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31050531914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.53-16.01-RP/6415adfc-35a9-480c-a740-dac02725c8f0.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.53-16.01-RP/6415adfc-35a9-480c-a740-dac02725c8f0.json deleted file mode 100644 index 01c39db1bc229a1dba19d9ade7e4ad0198a84cc6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.53-16.01-RP/6415adfc-35a9-480c-a740-dac02725c8f0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.53-16.01-RP/1762652580.215963", - "retrieved_timestamp": "1762652580.2159638", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.53-16.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.53-16.01-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4741352943523185 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5101675133484068 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43274999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31299867021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.54-17.01-RP/94d01e56-d7d5-4680-b577-ebcc0198ca0c.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.54-17.01-RP/94d01e56-d7d5-4680-b577-ebcc0198ca0c.json deleted file mode 100644 index adbd2bb7cce5883e3cab8f108acae8d3fedc2656..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.54-17.01-RP/94d01e56-d7d5-4680-b577-ebcc0198ca0c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.54-17.01-RP/1762652580.2162719", - "retrieved_timestamp": "1762652580.2162728", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.54-17.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.54-17.01-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4378903531518593 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4853448809638454 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48741666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23262965425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.55-17.01-RP/a2de66f0-bbd1-40b9-95d3-74e0335b853b.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.55-17.01-RP/a2de66f0-bbd1-40b9-95d3-74e0335b853b.json deleted file mode 100644 index b74fe190beb28043e4cf0d1f01f57521977205ba..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.55-17.01-RP/a2de66f0-bbd1-40b9-95d3-74e0335b853b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.55-17.01-RP/1762652580.2165911", - "retrieved_timestamp": "1762652580.2165918", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.55-17.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.55-17.01-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.496067101956143 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5076567509425027 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4725 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2657912234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.57-17.01-RP/8d99bf0e-7db0-46f5-96a0-7f977b8cf5f2.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.57-17.01-RP/8d99bf0e-7db0-46f5-96a0-7f977b8cf5f2.json deleted file mode 100644 index 309d5e462f1476da55ae187dff07a1dd15683ad7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.57-17.01-RP/8d99bf0e-7db0-46f5-96a0-7f977b8cf5f2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.57-17.01-RP/1762652580.216822", - "retrieved_timestamp": "1762652580.216822", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.57-17.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.57-17.01-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5151763986223221 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5064080420224116 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46859375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26512632978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.60-18.01-RP/b5c42995-f1fe-4a7e-90c1-d8fb00cba116.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.60-18.01-RP/b5c42995-f1fe-4a7e-90c1-d8fb00cba116.json deleted file mode 100644 index 08d1470285f794fcff0981f4f5c29fb83c966b55..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.60-18.01-RP/b5c42995-f1fe-4a7e-90c1-d8fb00cba116.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.60-18.01-RP/1762652580.217043", - "retrieved_timestamp": "1762652580.2170439", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.60-18.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.60-18.01-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5374329002601985 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5093724614980669 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46704166666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28366023936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.60.1-18.01-RP/8a14ed64-1408-469e-ab8d-05c897904d20.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.60.1-18.01-RP/8a14ed64-1408-469e-ab8d-05c897904d20.json deleted file mode 100644 index 8e0e391d20a405cd8ce806397d541bab38d3c05a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.60.1-18.01-RP/8a14ed64-1408-469e-ab8d-05c897904d20.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.60.1-18.01-RP/1762652580.217258", - "retrieved_timestamp": "1762652580.217259", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.60.1-18.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.60.1-18.01-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5187735209244804 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5119675522804026 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4497708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2913896276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.61-18.01-RP/1c166a10-c176-42c7-9421-750e170f5706.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.61-18.01-RP/1c166a10-c176-42c7-9421-750e170f5706.json deleted file mode 100644 index e4e55fdfe45d02696d1353805053c0a2edb53195..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.61-18.01-RP/1c166a10-c176-42c7-9421-750e170f5706.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.61-18.01-RP/1762652580.2174668", - "retrieved_timestamp": "1762652580.2174678", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.61-18.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.61-18.01-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5441273598496433 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5104839613346842 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4697395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27086103723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.62-18.01-RP/0c5bb530-f59b-4097-8a79-9e4f524385a2.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.62-18.01-RP/0c5bb530-f59b-4097-8a79-9e4f524385a2.json deleted file mode 100644 index c786d5edbc37de9565220044945136f9b707709d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.62-18.01-RP/0c5bb530-f59b-4097-8a79-9e4f524385a2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.62-18.01-RP/1762652580.21767", - "retrieved_timestamp": "1762652580.217671", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.62-18.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.62-18.01-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.536733644507684 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5103327208197285 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4537708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28773271276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.62.1-24.01-RP/26ba869e-ae3b-44ef-a215-f94e4e4cb1fc.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.62.1-24.01-RP/26ba869e-ae3b-44ef-a215-f94e4e4cb1fc.json deleted file mode 100644 index 60bbbb401da67c5e7946600d47646b02577286c2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.62.1-24.01-RP/26ba869e-ae3b-44ef-a215-f94e4e4cb1fc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.62.1-24.01-RP/1762652580.2178729", - "retrieved_timestamp": "1762652580.2178729", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.62.1-24.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.62.1-24.01-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5181740005407873 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5108967760246949 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45510416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28706781914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.64-24.01-RP/d7313786-f553-454e-b2c8-62a0162c9339.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.64-24.01-RP/d7313786-f553-454e-b2c8-62a0162c9339.json deleted file mode 100644 index 2cb41ea4fa5eca54878e8af78d9d2c52255614e3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.64-24.01-RP/d7313786-f553-454e-b2c8-62a0162c9339.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.64-24.01-RP/1762652580.218076", - "retrieved_timestamp": "1762652580.218076", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.64-24.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.64-24.01-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5440774921652327 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5059610114856247 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4620208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29330119680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.64.1-24.01-RP/359daeb1-3546-473f-801b-c9942fd010aa.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.64.1-24.01-RP/359daeb1-3546-473f-801b-c9942fd010aa.json deleted file mode 100644 index 519815cebdb7717911e4a7a3c0bac1352cfc6dc7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.64.1-24.01-RP/359daeb1-3546-473f-801b-c9942fd010aa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.64.1-24.01-RP/1762652580.218272", - "retrieved_timestamp": "1762652580.218272", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.64.1-24.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.64.1-24.01-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5446770125489258 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5059610114856247 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4620208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29330119680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.65-25.01-RP/fa5d2148-c45b-4266-a6a0-11b471273f75.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.65-25.01-RP/fa5d2148-c45b-4266-a6a0-11b471273f75.json deleted file mode 100644 index 141d3c066bf44b2128b1512a28441d5d5783fbba..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.65-25.01-RP/fa5d2148-c45b-4266-a6a0-11b471273f75.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.65-25.01-RP/1762652580.2184708", - "retrieved_timestamp": "1762652580.218472", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.65-25.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.65-25.01-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5029366525264077 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5095976254774931 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4339583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29970079787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.66-25.01-RP/b619dad2-fcb2-45ab-b603-ae1da3916eb7.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.66-25.01-RP/b619dad2-fcb2-45ab-b603-ae1da3916eb7.json deleted file mode 100644 index d861016485d0db71dce04d0b8d64fbb09cdfab96..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.66-25.01-RP/b619dad2-fcb2-45ab-b603-ae1da3916eb7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.66-25.01-RP/1762652580.2186701", - "retrieved_timestamp": "1762652580.2186701", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.66-25.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.66-25.01-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.532487134137422 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5128983540188711 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44344791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3039394946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.67-25.01-RP/cf0a4a2d-a104-43cf-ac01-66250e880ff0.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.67-25.01-RP/cf0a4a2d-a104-43cf-ac01-66250e880ff0.json deleted file mode 100644 index a22ee2bcd08cae5689fcb4c6ffa88af685282fe0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.67-25.01-RP/cf0a4a2d-a104-43cf-ac01-66250e880ff0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.67-25.01-RP/1762652580.21887", - "retrieved_timestamp": "1762652580.218871", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.67-25.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.67-25.01-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.536134124123991 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5112894150790012 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07477341389728097 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42788541666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30967420212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.68-25.01-RP/dd7cb16f-0752-4639-aa99-90b9be448295.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.68-25.01-RP/dd7cb16f-0752-4639-aa99-90b9be448295.json deleted file mode 100644 index cc1cba248d6e48dfa0dac3734a6b5a869fee12a3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.68-25.01-RP/dd7cb16f-0752-4639-aa99-90b9be448295.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.68-25.01-RP/1762652580.2190669", - "retrieved_timestamp": "1762652580.2190678", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.68-25.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.68-25.01-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5513714721383707 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5130058094823416 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07250755287009064 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44456249999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011968085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.69-25.01-RP/643da0d0-176a-40dd-b096-5aac8de827e9.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.69-25.01-RP/643da0d0-176a-40dd-b096-5aac8de827e9.json deleted file mode 100644 index a8a3db1388fe4718397666c78dbde3804e383ec1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.69-25.01-RP/643da0d0-176a-40dd-b096-5aac8de827e9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.69-25.01-RP/1762652580.219263", - "retrieved_timestamp": "1762652580.219264", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.69-25.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.69-25.01-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5437527981311808 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5097683665599672 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4485625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29654255319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.7-29.09-RP/9c6cf7a1-1a17-4070-9ce3-633461334f42.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.7-29.09-RP/9c6cf7a1-1a17-4070-9ce3-633461334f42.json deleted file mode 100644 index f38ebd7de40bad7bb78f214fcbef2225ffc3f40d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.7-29.09-RP/9c6cf7a1-1a17-4070-9ce3-633461334f42.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.7-29.09-RP/1762652580.2194638", - "retrieved_timestamp": "1762652580.219465", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.7-29.09-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.7-29.09-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5175744801570943 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5047661992357916 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4237916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3126662234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.70-25.01-RP/e109acd0-c7e3-4a9f-8e06-c428b95acc83.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.70-25.01-RP/e109acd0-c7e3-4a9f-8e06-c428b95acc83.json deleted file mode 100644 index 19251c603825da95f28a58edbc573412b907a8dd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.70-25.01-RP/e109acd0-c7e3-4a9f-8e06-c428b95acc83.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.70-25.01-RP/1762652580.2196732", - "retrieved_timestamp": "1762652580.219674", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.70-25.01-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.70-25.01-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.549797869652522 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.513632436415875 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05966767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45119791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2996176861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.70.1-01.02-RP/ee088f70-5734-4951-8bc0-e0579a053fd2.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.70.1-01.02-RP/ee088f70-5734-4951-8bc0-e0579a053fd2.json deleted file mode 100644 index a2c0cc19811348ddf34abeb915d48b2b9bac131d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.70.1-01.02-RP/ee088f70-5734-4951-8bc0-e0579a053fd2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.70.1-01.02-RP/1762652580.219877", - "retrieved_timestamp": "1762652580.219877", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.70.1-01.02-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.70.1-01.02-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5069582042314393 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5059798926804829 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.033987915407854986 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4599166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2748503989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.73-01.02-RP/ba7bf09f-b7a1-4fd4-b262-4929a81da34a.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.73-01.02-RP/ba7bf09f-b7a1-4fd4-b262-4929a81da34a.json deleted file mode 100644 index 479feec8059a8fee338766d2a7ad89a458e54020..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.73-01.02-RP/ba7bf09f-b7a1-4fd4-b262-4929a81da34a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.73-01.02-RP/1762652580.220075", - "retrieved_timestamp": "1762652580.220076", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.73-01.02-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.73-01.02-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.529164838184905 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5103425890792322 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46639583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27019614361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.74-02.02-RP/7470c7d4-80fe-4e88-a695-c628f9ed3682.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.74-02.02-RP/7470c7d4-80fe-4e88-a695-c628f9ed3682.json deleted file mode 100644 index c335ab5ec4b36472fb0c3990e81adcdda8cdef3a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.74-02.02-RP/7470c7d4-80fe-4e88-a695-c628f9ed3682.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.74-02.02-RP/1762652580.220269", - "retrieved_timestamp": "1762652580.2202702", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.74-02.02-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.74-02.02-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2935344884905384 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4646134965075064 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0015105740181268882 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42804166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21434507978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.76-02.02-RP/701743bb-1ddf-4810-824a-38959d4a0e02.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.76-02.02-RP/701743bb-1ddf-4810-824a-38959d4a0e02.json deleted file mode 100644 index 1328b8b45a6327fa39565b6eb6f01f6d0a161a6f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.76-02.02-RP/701743bb-1ddf-4810-824a-38959d4a0e02.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.76-02.02-RP/1762652580.220735", - "retrieved_timestamp": "1762652580.220737", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.76-02.02-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.76-02.02-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45290274250100837 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5085610407875073 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43616666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2652094414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.77-02.02-RP/0eebefc6-138f-4af5-a8b6-a35c798a38cb.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.77-02.02-RP/0eebefc6-138f-4af5-a8b6-a35c798a38cb.json deleted file mode 100644 index 336e66630a526bc4b8244f5c9f96238745adaa74..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.77-02.02-RP/0eebefc6-138f-4af5-a8b6-a35c798a38cb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.77-02.02-RP/1762652580.221007", - "retrieved_timestamp": "1762652580.2210078", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.77-02.02-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.77-02.02-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5309633993359841 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5109257300160749 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4765 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29986702127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.78-02.02-RP/ec943fa1-b138-46e8-b1ae-c9a476c73ed1.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.78-02.02-RP/ec943fa1-b138-46e8-b1ae-c9a476c73ed1.json deleted file mode 100644 index 284dbd106f3c0ce3272eeaa9e12fceeae82aee85..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.78-02.02-RP/ec943fa1-b138-46e8-b1ae-c9a476c73ed1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.78-02.02-RP/1762652580.221266", - "retrieved_timestamp": "1762652580.221267", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.78-02.02-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.78-02.02-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.405292401937969 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5002126961381052 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.468625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2954621010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.80-03.02-RP/847b4e14-a07c-45ed-b2eb-ecea0f80147b.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.80-03.02-RP/847b4e14-a07c-45ed-b2eb-ecea0f80147b.json deleted file mode 100644 index 093263aafef5d5db109d7c7e6ef0335a133668a6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.80-03.02-RP/847b4e14-a07c-45ed-b2eb-ecea0f80147b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_Ice0.80-03.02-RP/1762652580.2214909", - "retrieved_timestamp": "1762652580.221492", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/Ice0.80-03.02-RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/Ice0.80-03.02-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5516462984880118 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5097962218679292 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4923125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2912234042553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceCocoaRP-7b/5427828d-b53d-4e44-82ed-df6a9c0f9a47.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceCocoaRP-7b/5427828d-b53d-4e44-82ed-df6a9c0f9a47.json deleted file mode 100644 index 251f6ece17acf5695d459dbea3aed2a4d15e4fe5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceCocoaRP-7b/5427828d-b53d-4e44-82ed-df6a9c0f9a47.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_IceCocoaRP-7b/1762652580.2217228", - "retrieved_timestamp": "1762652580.2217238", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/IceCocoaRP-7b", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceCocoaRP-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4962421929369628 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4937902147076245 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4197916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3098404255319149 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceCoffeeRP-7b/bf5e2b11-79ce-49ed-947b-fb34110a3802.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceCoffeeRP-7b/bf5e2b11-79ce-49ed-947b-fb34110a3802.json deleted file mode 100644 index 57290145a04875d318b660719a65b1dc58e0803b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceCoffeeRP-7b/bf5e2b11-79ce-49ed-947b-fb34110a3802.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_IceCoffeeRP-7b/1762652580.2220101", - "retrieved_timestamp": "1762652580.2220109", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/IceCoffeeRP-7b", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceCoffeeRP-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4959174989029109 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48887216244327214 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4159791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2974567819148936 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrinkByFrankensteinV3RP/39325b65-ad12-44ef-a1bf-ffe9e870ced8.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrinkByFrankensteinV3RP/39325b65-ad12-44ef-a1bf-ffe9e870ced8.json deleted file mode 100644 index ce1ec6f1cfe4db07e62020a5b34af7abf5b2ed69..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrinkByFrankensteinV3RP/39325b65-ad12-44ef-a1bf-ffe9e870ced8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_IceDrinkByFrankensteinV3RP/1762652580.222236", - "retrieved_timestamp": "1762652580.222236", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/IceDrinkByFrankensteinV3RP", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceDrinkByFrankensteinV3RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4974911013887596 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4832523723413275 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4253125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.292719414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrinkNameGoesHereRP-7b-Model_Stock/b0aaf6e9-ffe3-4de9-b3f5-c33d52b59ed2.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrinkNameGoesHereRP-7b-Model_Stock/b0aaf6e9-ffe3-4de9-b3f5-c33d52b59ed2.json deleted file mode 100644 index 79ae461d0c2550663995921dd01bef9c7c0a9d6b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrinkNameGoesHereRP-7b-Model_Stock/b0aaf6e9-ffe3-4de9-b3f5-c33d52b59ed2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_IceDrinkNameGoesHereRP-7b-Model_Stock/1762652580.2224698", - "retrieved_timestamp": "1762652580.2224698", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/IceDrinkNameGoesHereRP-7b-Model_Stock", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceDrinkNameGoesHereRP-7b-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49684171332065585 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46578646938927254 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4067395833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2816655585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrinkNameNotFoundRP-7b-Model_Stock/f0e6fa5e-20c2-407d-8301-70d86cb1a51f.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrinkNameNotFoundRP-7b-Model_Stock/f0e6fa5e-20c2-407d-8301-70d86cb1a51f.json deleted file mode 100644 index 94fa3521b766edcbf854b602676242cc006b00f2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrinkNameNotFoundRP-7b-Model_Stock/f0e6fa5e-20c2-407d-8301-70d86cb1a51f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_IceDrinkNameNotFoundRP-7b-Model_Stock/1762652580.2227032", - "retrieved_timestamp": "1762652580.2227042", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/IceDrinkNameNotFoundRP-7b-Model_Stock", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceDrinkNameNotFoundRP-7b-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5130032757527804 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.502625425089929 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4371875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3064328457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrunkCherryRP-7b/c0e3f4ee-52dc-45c3-844a-8cc4e4520f24.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrunkCherryRP-7b/c0e3f4ee-52dc-45c3-844a-8cc4e4520f24.json deleted file mode 100644 index 920bb78492463fcba3cb0c6cfeabba3cc2784ee9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrunkCherryRP-7b/c0e3f4ee-52dc-45c3-844a-8cc4e4520f24.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_IceDrunkCherryRP-7b/1762652580.222923", - "retrieved_timestamp": "1762652580.222924", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/IceDrunkCherryRP-7b", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceDrunkCherryRP-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48982255969715904 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4846629039263151 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4291875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3009474734042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrunkenCherryRP-7b/9d1e6b55-aa7c-4fea-8a77-92795c0ee60a.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrunkenCherryRP-7b/9d1e6b55-aa7c-4fea-8a77-92795c0ee60a.json deleted file mode 100644 index 9ac74b51a02e8b753f8fd0bc04226a218f1770e4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrunkenCherryRP-7b/9d1e6b55-aa7c-4fea-8a77-92795c0ee60a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_IceDrunkenCherryRP-7b/1762652580.223197", - "retrieved_timestamp": "1762652580.223207", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/IceDrunkenCherryRP-7b", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceDrunkenCherryRP-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4762585495374495 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.509308586549064 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44459374999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30992353723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceEspressoRPv2-7b/ade14c35-442b-4a0a-8345-99b7b58dc194.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceEspressoRPv2-7b/ade14c35-442b-4a0a-8345-99b7b58dc194.json deleted file mode 100644 index 5deea65a5902e67812fc7378f0177dcee92827b7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceEspressoRPv2-7b/ade14c35-442b-4a0a-8345-99b7b58dc194.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_IceEspressoRPv2-7b/1762652580.223459", - "retrieved_timestamp": "1762652580.2234602", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/IceEspressoRPv2-7b", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceEspressoRPv2-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4977160600539901 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5054890156350785 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43306249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3061003989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceLemonTeaRP-32k-7b/fd90b65b-7b6f-4ca2-93e3-59486c0ee070.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceLemonTeaRP-32k-7b/fd90b65b-7b6f-4ca2-93e3-59486c0ee070.json deleted file mode 100644 index cfa6c21f9fdcd91d819fbd5b8f3c453b7a8c44c1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceLemonTeaRP-32k-7b/fd90b65b-7b6f-4ca2-93e3-59486c0ee070.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_IceLemonTeaRP-32k-7b/1762652580.2236779", - "retrieved_timestamp": "1762652580.223679", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/IceLemonTeaRP-32k-7b", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceLemonTeaRP-32k-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5212214701436633 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49973852418379305 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42903125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3067652925531915 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceMartiniRP-7b/210bea5c-35de-4bd6-93db-871704add0d6.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceMartiniRP-7b/210bea5c-35de-4bd6-93db-871704add0d6.json deleted file mode 100644 index 78ea253608bde70f24d8ba9f639d59c4e4ce7844..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceMartiniRP-7b/210bea5c-35de-4bd6-93db-871704add0d6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_IceMartiniRP-7b/1762652580.223922", - "retrieved_timestamp": "1762652580.223923", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/IceMartiniRP-7b", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceMartiniRP-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5044603873278457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4972421837639585 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4344895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3073470744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceNalyvkaRP-7b/95dd235d-6930-48fd-8594-5acb0110be29.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceNalyvkaRP-7b/95dd235d-6930-48fd-8594-5acb0110be29.json deleted file mode 100644 index 41d711c9258ee0f69c9db6a080b84abf971f0ced..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceNalyvkaRP-7b/95dd235d-6930-48fd-8594-5acb0110be29.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_IceNalyvkaRP-7b/1762652580.224114", - "retrieved_timestamp": "1762652580.224115", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/IceNalyvkaRP-7b", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceNalyvkaRP-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.549797869652522 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.513632436415875 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05966767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45119791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2996176861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceSakeRP-7b/67e351c8-6cca-4982-86e9-e774786c6862.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceSakeRP-7b/67e351c8-6cca-4982-86e9-e774786c6862.json deleted file mode 100644 index fc300ec4c9ce4a6dae857334ea688ac1c466a2cc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceSakeRP-7b/67e351c8-6cca-4982-86e9-e774786c6862.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_IceSakeRP-7b/1762652580.2243059", - "retrieved_timestamp": "1762652580.224307", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/IceSakeRP-7b", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceSakeRP-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5227950726295119 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5119287057484642 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41300000000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3176529255319149 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceSakeV4RP-7b/93b5850f-74d0-45cd-977e-5bf6e4dc5d8d.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceSakeV4RP-7b/93b5850f-74d0-45cd-977e-5bf6e4dc5d8d.json deleted file mode 100644 index 9d46f9a0d3f8faca2ad111be00d2c7c6dd9d3e49..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceSakeV4RP-7b/93b5850f-74d0-45cd-977e-5bf6e4dc5d8d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_IceSakeV4RP-7b/1762652580.224551", - "retrieved_timestamp": "1762652580.224552", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/IceSakeV4RP-7b", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceSakeV4RP-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4634192830578421 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4929557826908731 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40819791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31025598404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceSakeV6RP-7b/e9ebbcbf-81d5-494b-95a1-4e79feb42c40.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceSakeV6RP-7b/e9ebbcbf-81d5-494b-95a1-4e79feb42c40.json deleted file mode 100644 index 97d193f322f3bb83cdfc8643d20d94817fa26dd0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceSakeV6RP-7b/e9ebbcbf-81d5-494b-95a1-4e79feb42c40.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_IceSakeV6RP-7b/1762652580.224776", - "retrieved_timestamp": "1762652580.224777", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/IceSakeV6RP-7b", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceSakeV6RP-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5032613465604596 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49760336362566354 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42001041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3093417553191489 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceSakeV8RP-7b/dbeb9a8a-53c5-472b-a4b1-1aa0582f8486.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceSakeV8RP-7b/dbeb9a8a-53c5-472b-a4b1-1aa0582f8486.json deleted file mode 100644 index af0564a96ecf0acdc41a4511ad4b2a7f7e5e4bfd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceSakeV8RP-7b/dbeb9a8a-53c5-472b-a4b1-1aa0582f8486.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_IceSakeV8RP-7b/1762652580.2249868", - "retrieved_timestamp": "1762652580.224988", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/IceSakeV8RP-7b", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceSakeV8RP-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6085741388404988 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48847141337960176 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05966767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3992708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.301030585106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceTea21EnergyDrinkRPV13-DPOv3.5/f4d3a112-d529-48f8-a99e-85e9eb02e0c1.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceTea21EnergyDrinkRPV13-DPOv3.5/f4d3a112-d529-48f8-a99e-85e9eb02e0c1.json deleted file mode 100644 index 45a9602adfa20283638f37eee9221e7abb0835d2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceTea21EnergyDrinkRPV13-DPOv3.5/f4d3a112-d529-48f8-a99e-85e9eb02e0c1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_IceTea21EnergyDrinkRPV13-DPOv3.5/1762652580.2254012", - "retrieved_timestamp": "1762652580.225402", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/IceTea21EnergyDrinkRPV13-DPOv3.5", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceTea21EnergyDrinkRPV13-DPOv3.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48709978412833504 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4399660013109026 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39641666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24983377659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceTea21EnergyDrinkRPV13-DPOv3/4b4a9630-c942-445e-b396-4a988d489aa7.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceTea21EnergyDrinkRPV13-DPOv3/4b4a9630-c942-445e-b396-4a988d489aa7.json deleted file mode 100644 index 87712986f2c7be750f3ae5a1e9fbdd5e194f42c2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceTea21EnergyDrinkRPV13-DPOv3/4b4a9630-c942-445e-b396-4a988d489aa7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/icefog72_IceTea21EnergyDrinkRPV13-DPOv3/1762652580.225198", - "retrieved_timestamp": "1762652580.2251992", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "icefog72/IceTea21EnergyDrinkRPV13-DPOv3", - "developer": "icefog72", - "inference_platform": "unknown", - "id": "icefog72/IceTea21EnergyDrinkRPV13-DPOv3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5263423272472595 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5019587584232624 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4371875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30560172872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ilsp/ilsp_Llama-Krikri-8B-Instruct/592bd629-d0bf-48b0-83c6-abfa3731fd14.json b/leaderboard_data/HFOpenLLMv2/ilsp/ilsp_Llama-Krikri-8B-Instruct/592bd629-d0bf-48b0-83c6-abfa3731fd14.json deleted file mode 100644 index f8bc45a983d921eae60795add7579009b9218c0c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ilsp/ilsp_Llama-Krikri-8B-Instruct/592bd629-d0bf-48b0-83c6-abfa3731fd14.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ilsp_Llama-Krikri-8B-Instruct/1762652580.225861", - "retrieved_timestamp": "1762652580.225861", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ilsp/Llama-Krikri-8B-Instruct", - "developer": "ilsp", - "inference_platform": "unknown", - "id": "ilsp/Llama-Krikri-8B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6078748830879843 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.504664191645287 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4079791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3312832446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.202 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/inflatebot/inflatebot_MN-12B-Mag-Mell-R1/43f7613d-bd9f-480d-a2ed-dcabf3169944.json b/leaderboard_data/HFOpenLLMv2/inflatebot/inflatebot_MN-12B-Mag-Mell-R1/43f7613d-bd9f-480d-a2ed-dcabf3169944.json deleted file mode 100644 index c8d7e5e82caed43ba8ec0ef90b5d41581f52d5b5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/inflatebot/inflatebot_MN-12B-Mag-Mell-R1/43f7613d-bd9f-480d-a2ed-dcabf3169944.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/inflatebot_MN-12B-Mag-Mell-R1/1762652580.2261078", - "retrieved_timestamp": "1762652580.226109", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "inflatebot/MN-12B-Mag-Mell-R1", - "developer": "inflatebot", - "inference_platform": "unknown", - "id": "inflatebot/MN-12B-Mag-Mell-R1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46129602787271107 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5303854975434981 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40022916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34383311170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/informatiker/informatiker_Qwen2-7B-Instruct-abliterated/be1ab009-3aa6-43da-8b8e-11e5287a0370.json b/leaderboard_data/HFOpenLLMv2/informatiker/informatiker_Qwen2-7B-Instruct-abliterated/be1ab009-3aa6-43da-8b8e-11e5287a0370.json deleted file mode 100644 index 5cec04200724cb36e2854d02b545327629b2a670..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/informatiker/informatiker_Qwen2-7B-Instruct-abliterated/be1ab009-3aa6-43da-8b8e-11e5287a0370.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/informatiker_Qwen2-7B-Instruct-abliterated/1762652580.2263439", - "retrieved_timestamp": "1762652580.226345", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "informatiker/Qwen2-7B-Instruct-abliterated", - "developer": "informatiker", - "inference_platform": "unknown", - "id": "informatiker/Qwen2-7B-Instruct-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5821708622011817 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5534265515936739 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.263595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38879166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873005319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/insightfactory/insightfactory_Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model/3986b43c-2752-4a8f-b1e1-c3657734f84b.json b/leaderboard_data/HFOpenLLMv2/insightfactory/insightfactory_Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model/3986b43c-2752-4a8f-b1e1-c3657734f84b.json deleted file mode 100644 index 86498f734c4be21af61375d64d3b05fc33454c5b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/insightfactory/insightfactory_Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model/3986b43c-2752-4a8f-b1e1-c3657734f84b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/insightfactory_Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model/1762652580.226581", - "retrieved_timestamp": "1762652580.226582", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "insightfactory/Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model", - "developer": "insightfactory", - "inference_platform": "unknown", - "id": "insightfactory/Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45884807865352817 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4146016381618061 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10498489425981873 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.349875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2960438829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "", - "params_billions": 1.933 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/instruction-pretrain/instruction-pretrain_InstructLM-500M/38ba0438-f5ed-434e-af2e-fed71988f7b9.json b/leaderboard_data/HFOpenLLMv2/instruction-pretrain/instruction-pretrain_InstructLM-500M/38ba0438-f5ed-434e-af2e-fed71988f7b9.json deleted file mode 100644 index 082ae4f7d2fab32c145ace72fe0d43072cfd3a62..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/instruction-pretrain/instruction-pretrain_InstructLM-500M/38ba0438-f5ed-434e-af2e-fed71988f7b9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/instruction-pretrain_InstructLM-500M/1762652580.226826", - "retrieved_timestamp": "1762652580.226826", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "instruction-pretrain/InstructLM-500M", - "developer": "instruction-pretrain", - "inference_platform": "unknown", - "id": "instruction-pretrain/InstructLM-500M" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1027662158627996 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29408717872529677 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3528229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1141123670212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 0.5 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2-1_8b/fc23ef4f-2ef1-4a3e-b029-9d646145e135.json b/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2-1_8b/fc23ef4f-2ef1-4a3e-b029-9d646145e135.json deleted file mode 100644 index ed4541f3ec566a5d7d06295f04f9d169f4102254..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2-1_8b/fc23ef4f-2ef1-4a3e-b029-9d646145e135.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/internlm_internlm2-1_8b/1762652580.227062", - "retrieved_timestamp": "1762652580.227063", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "internlm/internlm2-1_8b", - "developer": "internlm", - "inference_platform": "unknown", - "id": "internlm/internlm2-1_8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2197702097102355 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3879732800028095 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38128125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15882646276595744 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "InternLM2ForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2-7b/d4bba57d-2a3c-4945-ae47-7830840d0259.json b/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2-7b/d4bba57d-2a3c-4945-ae47-7830840d0259.json deleted file mode 100644 index 7ff338290abbcae3c707ad20932b38f4588a8852..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2-7b/d4bba57d-2a3c-4945-ae47-7830840d0259.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/internlm_internlm2-7b/1762652580.2273018", - "retrieved_timestamp": "1762652580.227303", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "internlm/internlm2-7b", - "developer": "internlm", - "inference_platform": "unknown", - "id": "internlm/internlm2-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22803680981595092 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5825 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08571428571428572 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33666666666666667 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43999999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Unknown", - "params_billions": 0.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2-chat-1_8b/767b5c7e-6319-487f-906c-2abca794f884.json b/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2-chat-1_8b/767b5c7e-6319-487f-906c-2abca794f884.json deleted file mode 100644 index e0c3df475761d0158147aa2ac38c98eb77d6f50e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2-chat-1_8b/767b5c7e-6319-487f-906c-2abca794f884.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/internlm_internlm2-chat-1_8b/1762652580.227562", - "retrieved_timestamp": "1762652580.227563", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "internlm/internlm2-chat-1_8b", - "developer": "internlm", - "inference_platform": "unknown", - "id": "internlm/internlm2-chat-1_8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2386545477111841 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4452271664119214 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36305208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18392619680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "InternLM2ForCausalLM", - "params_billions": 1.889 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2_5-1_8b-chat/d37e87e2-53c3-42fa-b78d-04d2819b14d3.json b/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2_5-1_8b-chat/d37e87e2-53c3-42fa-b78d-04d2819b14d3.json deleted file mode 100644 index 2c03343fc1acb0f556e0f4a3553c0caaf53c9c0c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2_5-1_8b-chat/d37e87e2-53c3-42fa-b78d-04d2819b14d3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/internlm_internlm2_5-1_8b-chat/1762652580.227762", - "retrieved_timestamp": "1762652580.227763", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "internlm/internlm2_5-1_8b-chat", - "developer": "internlm", - "inference_platform": "unknown", - "id": "internlm/internlm2_5-1_8b-chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38490870889240547 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4488926786996439 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15861027190332327 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35939583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12990359042553193 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "InternLM2ForCausalLM", - "params_billions": 1.89 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2_5-20b-chat/a651c814-41e2-4951-bb8f-df799cc6e470.json b/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2_5-20b-chat/a651c814-41e2-4951-bb8f-df799cc6e470.json deleted file mode 100644 index e2b476f9e8f47ccbc81f3261ea8ac48ce2210bc8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2_5-20b-chat/a651c814-41e2-4951-bb8f-df799cc6e470.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/internlm_internlm2_5-20b-chat/1762652580.2279649", - "retrieved_timestamp": "1762652580.227966", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "internlm/internlm2_5-20b-chat", - "developer": "internlm", - "inference_platform": "unknown", - "id": "internlm/internlm2_5-20b-chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7009977969565198 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7473580533672884 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4558229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39976728723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "InternLM2ForCausalLM", - "params_billions": 19.86 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2_5-7b-chat/28245528-26e8-48a8-9cc8-68d7a6389bde.json b/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2_5-7b-chat/28245528-26e8-48a8-9cc8-68d7a6389bde.json deleted file mode 100644 index 202718af088336a72421d6220831d3c47cc74eb0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2_5-7b-chat/28245528-26e8-48a8-9cc8-68d7a6389bde.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/internlm_internlm2_5-7b-chat/1762652580.2281651", - "retrieved_timestamp": "1762652580.2281659", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "internlm/internlm2_5-7b-chat", - "developer": "internlm", - "inference_platform": "unknown", - "id": "internlm/internlm2_5-7b-chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5538692890419642 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7073179916851792 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25302114803625375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45938541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3776595744680851 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "InternLM2ForCausalLM", - "params_billions": 7.738 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/intervitens/intervitens_mini-magnum-12b-v1.1/8ad974e6-8d4c-45bf-86d0-f701cfc323d5.json b/leaderboard_data/HFOpenLLMv2/intervitens/intervitens_mini-magnum-12b-v1.1/8ad974e6-8d4c-45bf-86d0-f701cfc323d5.json deleted file mode 100644 index b5b47b8079b0ae5dbb16a486ad09454bf07a0856..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/intervitens/intervitens_mini-magnum-12b-v1.1/8ad974e6-8d4c-45bf-86d0-f701cfc323d5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/intervitens_mini-magnum-12b-v1.1/1762652580.228364", - "retrieved_timestamp": "1762652580.228365", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "intervitens/mini-magnum-12b-v1.1", - "developer": "intervitens", - "inference_platform": "unknown", - "id": "intervitens/mini-magnum-12b-v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5155509603407846 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.506180035650624 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4004479166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3291223404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/inumulaisk/inumulaisk_eval_model/e3e4a9b3-ce68-4999-966e-2ef2baf99266.json b/leaderboard_data/HFOpenLLMv2/inumulaisk/inumulaisk_eval_model/e3e4a9b3-ce68-4999-966e-2ef2baf99266.json deleted file mode 100644 index 2a1e6577b46dcbe382962ced0853cf443ea4287a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/inumulaisk/inumulaisk_eval_model/e3e4a9b3-ce68-4999-966e-2ef2baf99266.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/inumulaisk_eval_model/1762652580.228598", - "retrieved_timestamp": "1762652580.228599", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "inumulaisk/eval_model", - "developer": "inumulaisk", - "inference_platform": "unknown", - "id": "inumulaisk/eval_model" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19314197440568803 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35118774303346373 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.297583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35796875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16638962765957446 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/invalid-coder/invalid-coder_Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp/cdb8a900-75f3-4e6b-9d35-5a6791e8acd1.json b/leaderboard_data/HFOpenLLMv2/invalid-coder/invalid-coder_Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp/cdb8a900-75f3-4e6b-9d35-5a6791e8acd1.json deleted file mode 100644 index 91f345b2ae107f4631da1a158520f1a918390340..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/invalid-coder/invalid-coder_Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp/cdb8a900-75f3-4e6b-9d35-5a6791e8acd1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/invalid-coder_Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp/1762652580.229043", - "retrieved_timestamp": "1762652580.229047", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp", - "developer": "invalid-coder", - "inference_platform": "unknown", - "id": "invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45547591501660034 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5158439010792586 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3992395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145777925531915 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/invisietch/invisietch_EtherealRainbow-v0.2-8B/c60869f0-7009-48c9-be41-339335e5ee4e.json b/leaderboard_data/HFOpenLLMv2/invisietch/invisietch_EtherealRainbow-v0.2-8B/c60869f0-7009-48c9-be41-339335e5ee4e.json deleted file mode 100644 index 3fdad3af234edded25afcc1ba2d6a0c9120ccc4b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/invisietch/invisietch_EtherealRainbow-v0.2-8B/c60869f0-7009-48c9-be41-339335e5ee4e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/invisietch_EtherealRainbow-v0.2-8B/1762652580.229454", - "retrieved_timestamp": "1762652580.229455", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "invisietch/EtherealRainbow-v0.2-8B", - "developer": "invisietch", - "inference_platform": "unknown", - "id": "invisietch/EtherealRainbow-v0.2-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39032988027323057 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5102035205059678 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0823262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38267708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36527593085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/invisietch/invisietch_EtherealRainbow-v0.3-8B/cc85ba7f-bbc0-43e7-a678-949fd5be8498.json b/leaderboard_data/HFOpenLLMv2/invisietch/invisietch_EtherealRainbow-v0.3-8B/cc85ba7f-bbc0-43e7-a678-949fd5be8498.json deleted file mode 100644 index b22061cd3de5d71ebaabff32e075b10b9b66ab41..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/invisietch/invisietch_EtherealRainbow-v0.3-8B/cc85ba7f-bbc0-43e7-a678-949fd5be8498.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/invisietch_EtherealRainbow-v0.3-8B/1762652580.229776", - "retrieved_timestamp": "1762652580.2297769", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "invisietch/EtherealRainbow-v0.3-8B", - "developer": "invisietch", - "inference_platform": "unknown", - "id": "invisietch/EtherealRainbow-v0.3-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36822298168858625 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5096758454539693 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39039583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36261635638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/invisietch/invisietch_MiS-Firefly-v0.2-22B/6df8e489-865f-4692-a673-6abbf2159d1d.json b/leaderboard_data/HFOpenLLMv2/invisietch/invisietch_MiS-Firefly-v0.2-22B/6df8e489-865f-4692-a673-6abbf2159d1d.json deleted file mode 100644 index 4831400584a3713b8082fa34a7cd7cb0c0123936..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/invisietch/invisietch_MiS-Firefly-v0.2-22B/6df8e489-865f-4692-a673-6abbf2159d1d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/invisietch_MiS-Firefly-v0.2-22B/1762652580.2300959", - "retrieved_timestamp": "1762652580.2300968", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "invisietch/MiS-Firefly-v0.2-22B", - "developer": "invisietch", - "inference_platform": "unknown", - "id": "invisietch/MiS-Firefly-v0.2-22B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5371082062261466 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5513523591170696 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16540785498489427 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46937500000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3620345744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/invisietch/invisietch_Nimbus-Miqu-v0.1-70B/c36d07f4-b263-4849-86f9-d3fea355c068.json b/leaderboard_data/HFOpenLLMv2/invisietch/invisietch_Nimbus-Miqu-v0.1-70B/c36d07f4-b263-4849-86f9-d3fea355c068.json deleted file mode 100644 index 1a01518cd49c8614fc003a7c7bf6ea431fed9955..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/invisietch/invisietch_Nimbus-Miqu-v0.1-70B/c36d07f4-b263-4849-86f9-d3fea355c068.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/invisietch_Nimbus-Miqu-v0.1-70B/1762652580.230321", - "retrieved_timestamp": "1762652580.230322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "invisietch/Nimbus-Miqu-v0.1-70B", - "developer": "invisietch", - "inference_platform": "unknown", - "id": "invisietch/Nimbus-Miqu-v0.1-70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46466819150963884 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.601030667794844 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3389261744966443 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41331249999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3853058510638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 68.977 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaredjoss/jaredjoss_pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model/cf6b0824-45c4-4b47-bf23-e5df5673b74e.json b/leaderboard_data/HFOpenLLMv2/jaredjoss/jaredjoss_pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model/cf6b0824-45c4-4b47-bf23-e5df5673b74e.json deleted file mode 100644 index 974070cb2fd71fa22c58c8af72b61f4322c00372..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaredjoss/jaredjoss_pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model/cf6b0824-45c4-4b47-bf23-e5df5673b74e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaredjoss_pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model/1762652580.230787", - "retrieved_timestamp": "1762652580.230787", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaredjoss/pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model", - "developer": "jaredjoss", - "inference_platform": "unknown", - "id": "jaredjoss/pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15722172723928066 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2863444769655102 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3606979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11685505319148937 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 0.407 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Auro-Kosmos-EVAA-v2-8B/0064f2f6-672e-478c-9184-e7fd32ad06b8.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Auro-Kosmos-EVAA-v2-8B/0064f2f6-672e-478c-9184-e7fd32ad06b8.json deleted file mode 100644 index 34edb73075b4345a097c0237a132778a40b7af46..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Auro-Kosmos-EVAA-v2-8B/0064f2f6-672e-478c-9184-e7fd32ad06b8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Auro-Kosmos-EVAA-v2-8B/1762652580.231028", - "retrieved_timestamp": "1762652580.231029", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Auro-Kosmos-EVAA-v2-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Auro-Kosmos-EVAA-v2-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4778077722664752 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5447163557182707 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14123867069486404 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.425 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38580452127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Auro-Kosmos-EVAA-v2.1-8B/4381d7ab-d19f-4fa0-a69a-978af28df8fa.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Auro-Kosmos-EVAA-v2.1-8B/4381d7ab-d19f-4fa0-a69a-978af28df8fa.json deleted file mode 100644 index 50437ac8da85ee911d6c7580debe27e75e9b5060..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Auro-Kosmos-EVAA-v2.1-8B/4381d7ab-d19f-4fa0-a69a-978af28df8fa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Auro-Kosmos-EVAA-v2.1-8B/1762652580.231263", - "retrieved_timestamp": "1762652580.231264", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Auro-Kosmos-EVAA-v2.1-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Auro-Kosmos-EVAA-v2.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4665919759571271 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5444200006474947 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14577039274924472 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4316979166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.382563164893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Auro-Kosmos-EVAA-v2.2-8B/4e616fc6-8baa-4c9a-9098-b8d108911ad2.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Auro-Kosmos-EVAA-v2.2-8B/4e616fc6-8baa-4c9a-9098-b8d108911ad2.json deleted file mode 100644 index b3536b2f4f13cbf91db25c1450089866797406fd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Auro-Kosmos-EVAA-v2.2-8B/4e616fc6-8baa-4c9a-9098-b8d108911ad2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Auro-Kosmos-EVAA-v2.2-8B/1762652580.231466", - "retrieved_timestamp": "1762652580.231467", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Auro-Kosmos-EVAA-v2.2-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Auro-Kosmos-EVAA-v2.2-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4267997801389203 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5431077158331955 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14123867069486404 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42506249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37982047872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Auro-Kosmos-EVAA-v2.3-8B/9c7ee100-754e-4665-8527-452021a2243b.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Auro-Kosmos-EVAA-v2.3-8B/9c7ee100-754e-4665-8527-452021a2243b.json deleted file mode 100644 index 353deb67195e93b40f39d3babe39ac7e86bce948..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Auro-Kosmos-EVAA-v2.3-8B/9c7ee100-754e-4665-8527-452021a2243b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Auro-Kosmos-EVAA-v2.3-8B/1762652580.231667", - "retrieved_timestamp": "1762652580.231667", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Auro-Kosmos-EVAA-v2.3-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Auro-Kosmos-EVAA-v2.3-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42712447417297217 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5440818233123913 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4277916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37840757978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Aurora_faustus-8B/0563ee22-d981-45cb-83f8-7dbdb2734d10.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Aurora_faustus-8B/0563ee22-d981-45cb-83f8-7dbdb2734d10.json deleted file mode 100644 index e06375e9d465da6dc55e40779ad0a247f0bd417d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Aurora_faustus-8B/0563ee22-d981-45cb-83f8-7dbdb2734d10.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-Aurora_faustus-8B/1762652580.231864", - "retrieved_timestamp": "1762652580.2318652", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-Aurora_faustus-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-Aurora_faustus-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.443236168920686 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5260325661068855 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4116979166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38131648936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-8B/746ffa2c-cc95-4d69-9e46-0e8f4febd440.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-8B/746ffa2c-cc95-4d69-9e46-0e8f4febd440.json deleted file mode 100644 index 451406eeafdc72529f83097af3cab18849b99787..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-8B/746ffa2c-cc95-4d69-9e46-0e8f4febd440.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-8B/1762652580.232065", - "retrieved_timestamp": "1762652580.232065", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4404635256674513 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5311831227740652 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4236666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3818151595744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-Franken-Immersive-v39-8B/f9e1901a-854d-4437-8d49-a6c47799f687.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-Franken-Immersive-v39-8B/f9e1901a-854d-4437-8d49-a6c47799f687.json deleted file mode 100644 index edddc0881745e58ee246175ae398d7527a3ec849..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-Franken-Immersive-v39-8B/f9e1901a-854d-4437-8d49-a6c47799f687.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-Franken-Immersive-v39-8B/1762652580.232267", - "retrieved_timestamp": "1762652580.232268", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-Franken-Immersive-v39-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-Franken-Immersive-v39-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43779061778303796 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5189720817259138 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4236354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3900432180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-Franken-v38-8B/8919b3ad-529c-4391-bec3-65b81dad97c3.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-Franken-v38-8B/8919b3ad-529c-4391-bec3-65b81dad97c3.json deleted file mode 100644 index 6a72dd8ccf34305b9b0b4b9af9606d584bf23583..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-Franken-v38-8B/8919b3ad-529c-4391-bec3-65b81dad97c3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-Franken-v38-8B/1762652580.2324722", - "retrieved_timestamp": "1762652580.2324731", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-Franken-v38-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-Franken-v38-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4355676272290855 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5229513322616746 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42115624999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3890458776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-Fusion-8B/3030519e-f137-4091-9394-26a0779f0ad9.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-Fusion-8B/3030519e-f137-4091-9394-26a0779f0ad9.json deleted file mode 100644 index cc6f930e4ee141e121840db1e0b82c5748939ca4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-Fusion-8B/3030519e-f137-4091-9394-26a0779f0ad9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-Fusion-8B/1762652580.2328691", - "retrieved_timestamp": "1762652580.2328691", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-Fusion-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-Fusion-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43446832183052075 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5419028777027763 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42766666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38538896276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-Fusion-8B/ac41e588-0664-44f5-9fa9-eafd6508078b.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-Fusion-8B/ac41e588-0664-44f5-9fa9-eafd6508078b.json deleted file mode 100644 index 50b56b25b8052a472ea28e927a94b95071876e4f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-Fusion-8B/ac41e588-0664-44f5-9fa9-eafd6508078b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-Fusion-8B/1762652580.23267", - "retrieved_timestamp": "1762652580.232671", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-Fusion-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-Fusion-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4417623018036587 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5405890148943007 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1351963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42766666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859707446808511 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-8B/eb68e0e3-1e39-4779-bc99-4e1825d9c602.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-8B/eb68e0e3-1e39-4779-bc99-4e1825d9c602.json deleted file mode 100644 index 20fe2b34b78554e690e44c1c2aac53eeca0f295c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-8B/eb68e0e3-1e39-4779-bc99-4e1825d9c602.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-8B/1762652580.233048", - "retrieved_timestamp": "1762652580.2330492", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34052092891306174 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5195634214282913 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08836858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4301145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3646941489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-light-8B/0d2e1c3f-8ee6-44b0-912a-452e2a5a6da7.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-light-8B/0d2e1c3f-8ee6-44b0-912a-452e2a5a6da7.json deleted file mode 100644 index 542c718bbae9f2aa2c4206c8606fc1e9f19c1bb6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-light-8B/0d2e1c3f-8ee6-44b0-912a-452e2a5a6da7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-light-8B/1762652580.233289", - "retrieved_timestamp": "1762652580.23329", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-light-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-light-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38238651223198894 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5271029575696119 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42490625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3781582446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v23-8B/5d5ae047-72d1-4083-8e28-dcce7337ed25.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v23-8B/5d5ae047-72d1-4083-8e28-dcce7337ed25.json deleted file mode 100644 index 0921bdf147d4b3bbb662ad72d63df8c1f8d22be3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v23-8B/5d5ae047-72d1-4083-8e28-dcce7337ed25.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v23-8B/1762652580.233495", - "retrieved_timestamp": "1762652580.233495", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v23-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-v23-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4040933611705829 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5289840558524612 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43684375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37059507978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v24-8B/e6b62da0-ad6d-431c-8a0e-185c6eddf3da.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v24-8B/e6b62da0-ad6d-431c-8a0e-185c6eddf3da.json deleted file mode 100644 index 0ee959c8cc338b186ff2be0d4cc70f322970248a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v24-8B/e6b62da0-ad6d-431c-8a0e-185c6eddf3da.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v24-8B/1762652580.233697", - "retrieved_timestamp": "1762652580.2336981", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v24-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-v24-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42587556572117535 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5276140433113651 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42903125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3779089095744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v25-8B/81c8704c-7124-42d1-b320-77e31e35898b.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v25-8B/81c8704c-7124-42d1-b320-77e31e35898b.json deleted file mode 100644 index 7b383aefcd890010eb1b9f1b9f5b98b266e77332..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v25-8B/81c8704c-7124-42d1-b320-77e31e35898b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v25-8B/1762652580.23391", - "retrieved_timestamp": "1762652580.23391", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v25-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-v25-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4420869958377106 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5290702582598797 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11858006042296072 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4303333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37159242021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v26-8B/6705072a-5a46-49ae-925f-1cf7da1ea288.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v26-8B/6705072a-5a46-49ae-925f-1cf7da1ea288.json deleted file mode 100644 index 9302dea4debceb31af5d18b92a6e76f1d3ccd769..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v26-8B/6705072a-5a46-49ae-925f-1cf7da1ea288.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v26-8B/1762652580.234126", - "retrieved_timestamp": "1762652580.234127", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v26-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-v26-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4413877400851962 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5271171047819411 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11329305135951662 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4263645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3793218085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v27-8B/d3dcd3f0-2f43-4b82-ba29-77a69a9b3e8f.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v27-8B/d3dcd3f0-2f43-4b82-ba29-77a69a9b3e8f.json deleted file mode 100644 index 337938bd665a4e42d28e2a72e40dddccd9d16a0b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v27-8B/d3dcd3f0-2f43-4b82-ba29-77a69a9b3e8f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v27-8B/1762652580.2343428", - "retrieved_timestamp": "1762652580.234344", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v27-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-v27-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4378404854674486 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5290320010579407 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37549867021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v28-8B/e2aa230d-452e-42f0-a780-af255c62120e.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v28-8B/e2aa230d-452e-42f0-a780-af255c62120e.json deleted file mode 100644 index 3631eec91bc9aecdfa45981f4233bb5f1a836b63..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v28-8B/e2aa230d-452e-42f0-a780-af255c62120e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v28-8B/1762652580.234553", - "retrieved_timestamp": "1762652580.234553", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v28-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-v28-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43659157701565177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5294743678489208 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43296874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v29-8B/86e94a19-e497-4539-802b-597ce0e0ced0.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v29-8B/86e94a19-e497-4539-802b-597ce0e0ced0.json deleted file mode 100644 index df7c8e8b8fbbb10dfbb3638a298022339348698b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v29-8B/86e94a19-e497-4539-802b-597ce0e0ced0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v29-8B/1762652580.234771", - "retrieved_timestamp": "1762652580.234771", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v29-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-v29-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4487315877427448 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5275189525290296 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42366666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37649601063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v30-8B/320c581d-f667-4dab-a32c-bb9f2621e84d.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v30-8B/320c581d-f667-4dab-a32c-bb9f2621e84d.json deleted file mode 100644 index 6a5ad8efcfd630a8fd0921f824d7cdc2bbf33fc4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v30-8B/320c581d-f667-4dab-a32c-bb9f2621e84d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v30-8B/1762652580.2349901", - "retrieved_timestamp": "1762652580.234991", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v30-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-v30-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42947268802333366 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5327819889174134 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4263333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3937832446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v31-8B/0757cecd-bc5f-4095-90ee-25920ae6670c.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v31-8B/0757cecd-bc5f-4095-90ee-25920ae6670c.json deleted file mode 100644 index 51f26550d589f666b5031232cc634319b441476a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v31-8B/0757cecd-bc5f-4095-90ee-25920ae6670c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v31-8B/1762652580.235214", - "retrieved_timestamp": "1762652580.235214", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v31-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-v31-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43986400528375824 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5315048053167004 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11329305135951662 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42506249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39345079787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v32-8B/f58f0ecc-a059-448d-a2f9-e36b601e2154.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v32-8B/f58f0ecc-a059-448d-a2f9-e36b601e2154.json deleted file mode 100644 index a49b52f280517949b17bf57ac939ae3efc8e1a2a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v32-8B/f58f0ecc-a059-448d-a2f9-e36b601e2154.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v32-8B/1762652580.235436", - "retrieved_timestamp": "1762652580.2354372", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v32-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-v32-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4487315877427448 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5292530349260334 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1148036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42106249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3776595744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v33-8B/2436838e-2b6a-4c1e-b8c2-ec505d9a4c34.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v33-8B/2436838e-2b6a-4c1e-b8c2-ec505d9a4c34.json deleted file mode 100644 index cb329fb5c0be9aa6ba451a94c1107b51eddfe15c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v33-8B/2436838e-2b6a-4c1e-b8c2-ec505d9a4c34.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v33-8B/1762652580.23565", - "retrieved_timestamp": "1762652580.235651", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v33-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-v33-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4301719437758481 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5321153222507468 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41839583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.390874335106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v34-8B/11486e0e-a9e3-43b0-b26e-299a86555d16.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v34-8B/11486e0e-a9e3-43b0-b26e-299a86555d16.json deleted file mode 100644 index e40167abec9df18e7f846e72a85f3582817f2664..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v34-8B/11486e0e-a9e3-43b0-b26e-299a86555d16.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v34-8B/1762652580.235871", - "retrieved_timestamp": "1762652580.235871", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-PRP-v34-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-PRP-v34-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45625052638111324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.533301459442271 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42372916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3927027925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-8B/75037d12-da94-4c55-8de5-a7cef098d4b0.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-8B/75037d12-da94-4c55-8de5-a7cef098d4b0.json deleted file mode 100644 index 5ae8689263e0c496de510c725d044bc9ea373ace..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-8B/75037d12-da94-4c55-8de5-a7cef098d4b0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-8B/1762652580.236081", - "retrieved_timestamp": "1762652580.2360818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-TSN-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-TSN-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47213726246359655 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5176546480934434 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43290625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3816489361702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-light-8B/9f0aa20f-8687-4c21-b222-39a322f90842.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-light-8B/9f0aa20f-8687-4c21-b222-39a322f90842.json deleted file mode 100644 index f0271dcdb260bbfdd2043f6cac9d9946002b04c9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-light-8B/9f0aa20f-8687-4c21-b222-39a322f90842.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-light-8B/1762652580.236298", - "retrieved_timestamp": "1762652580.236299", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-TSN-light-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-TSN-light-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46849027247702757 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5235021286391058 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42893749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38056848404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-v19-8B/91c2897a-3ae3-402b-aadf-26d0b8d746c5.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-v19-8B/91c2897a-3ae3-402b-aadf-26d0b8d746c5.json deleted file mode 100644 index aaf716631c0d5602db6f964d1145076697ab19be..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-v19-8B/91c2897a-3ae3-402b-aadf-26d0b8d746c5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-v19-8B/1762652580.236516", - "retrieved_timestamp": "1762652580.2365172", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-TSN-v19-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-TSN-v19-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4563502617499346 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5316458785173577 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4276979166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37898936170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-v20-8B/4a60fea6-e0e8-497e-9b29-439e7641e77b.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-v20-8B/4a60fea6-e0e8-497e-9b29-439e7641e77b.json deleted file mode 100644 index c2be55d379a6d8afa649278db91108b67a573820..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-v20-8B/4a60fea6-e0e8-497e-9b29-439e7641e77b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-v20-8B/1762652580.236737", - "retrieved_timestamp": "1762652580.236737", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-TSN-v20-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-TSN-v20-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4423119545029411 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5250468078369915 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42103124999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39361702127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-v21-8B/d9c819c2-a3f6-481e-bd71-47912aef9847.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-v21-8B/d9c819c2-a3f6-481e-bd71-47912aef9847.json deleted file mode 100644 index da78a06b0b4ded3b7e7038a15ceb0aadb7fb2511..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-v21-8B/d9c819c2-a3f6-481e-bd71-47912aef9847.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-v21-8B/1762652580.2369542", - "retrieved_timestamp": "1762652580.236955", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-TSN-v21-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-TSN-v21-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46701640536000033 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.524796520922724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43427083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3816489361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-v22-8B/6e20f902-8752-466c-b8d4-34787fb90fce.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-v22-8B/6e20f902-8752-466c-b8d4-34787fb90fce.json deleted file mode 100644 index 96e1a4ea24c9b0b8fb624ce083f01db96b04ae75..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-v22-8B/6e20f902-8752-466c-b8d4-34787fb90fce.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-v22-8B/1762652580.2371762", - "retrieved_timestamp": "1762652580.2371771", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-TSN-v22-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-TSN-v22-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4673410993940522 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5245863682593667 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11329305135951662 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4303333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38115026595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-8B/d25510e4-6549-4f64-8ec4-37ac8671050c.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-8B/d25510e4-6549-4f64-8ec4-37ac8671050c.json deleted file mode 100644 index b5033e3b08d1fedba969287bf75674928d215b6f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-8B/d25510e4-6549-4f64-8ec4-37ac8671050c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-8B/1762652580.237391", - "retrieved_timestamp": "1762652580.237392", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-gamma-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-gamma-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45722460848326885 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5321936191858193 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10498489425981873 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4305833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39012632978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-alt-8B/58e279d4-da0f-4e2c-a74d-c51caeaad884.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-alt-8B/58e279d4-da0f-4e2c-a74d-c51caeaad884.json deleted file mode 100644 index a6c082b084dd5d883fba3c2e32227355d99965b3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-alt-8B/58e279d4-da0f-4e2c-a74d-c51caeaad884.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-alt-8B/1762652580.23761", - "retrieved_timestamp": "1762652580.23761", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-gamma-alt-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-gamma-alt-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4542270065648036 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5297928701221488 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1095166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42921875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3896276595744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-light-8B/64c07a98-4f3f-49f7-99de-9963dcfedeba.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-light-8B/64c07a98-4f3f-49f7-99de-9963dcfedeba.json deleted file mode 100644 index 095e335043061fa022b2d1bd0dd20992d4e36b50..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-light-8B/64c07a98-4f3f-49f7-99de-9963dcfedeba.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-light-8B/1762652580.237838", - "retrieved_timestamp": "1762652580.2378392", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-gamma-light-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-gamma-light-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45809895521660304 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5376138387743472 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42909375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.394281914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-light-alt-8B/abebffbf-48b5-4452-8c7a-bb1175a7e979.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-light-alt-8B/abebffbf-48b5-4452-8c7a-bb1175a7e979.json deleted file mode 100644 index 14284eed254af7e6779e09973818894a4b86d41f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-light-alt-8B/abebffbf-48b5-4452-8c7a-bb1175a7e979.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-light-alt-8B/1762652580.238084", - "retrieved_timestamp": "1762652580.238085", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-gamma-light-alt-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-gamma-light-alt-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44535942410581697 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5327145731870764 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11329305135951662 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43045833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39228723404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-ultra-light-8B/1810feae-7a27-4c17-8174-3cd8a143b21f.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-ultra-light-8B/1810feae-7a27-4c17-8174-3cd8a143b21f.json deleted file mode 100644 index fc4963cae6649ca46b4621668511b7f7a373790d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-ultra-light-8B/1810feae-7a27-4c17-8174-3cd8a143b21f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-ultra-light-8B/1762652580.238316", - "retrieved_timestamp": "1762652580.238317", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-gamma-ultra-light-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-gamma-ultra-light-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4563003940655239 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5316344937208096 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4196979166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3914561170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v13-8B/1fc6ca13-157c-4502-8724-be153afb6347.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v13-8B/1fc6ca13-157c-4502-8724-be153afb6347.json deleted file mode 100644 index e6ad25d6ff6f352bf9ef062f2ca114092aba5bce..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v13-8B/1fc6ca13-157c-4502-8724-be153afb6347.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v13-8B/1762652580.238605", - "retrieved_timestamp": "1762652580.238605", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-gamma-v13-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-gamma-v13-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44286160720222345 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5359422335881335 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42776041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3929521276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v14-8B/c20f5702-24fc-443a-875e-495401776eeb.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v14-8B/c20f5702-24fc-443a-875e-495401776eeb.json deleted file mode 100644 index 938a402fdfa2c8e514d2b94ef5515d2f5b3ae439..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v14-8B/c20f5702-24fc-443a-875e-495401776eeb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v14-8B/1762652580.23884", - "retrieved_timestamp": "1762652580.23884", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-gamma-v14-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-gamma-v14-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4380155764482684 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5363063034440413 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42772916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3931183510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v15-8B/24e11e0c-fb61-46c1-a05e-c533eb392195.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v15-8B/24e11e0c-fb61-46c1-a05e-c533eb392195.json deleted file mode 100644 index 1578c90b084868f5fee78750786e5434599f8f11..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v15-8B/24e11e0c-fb61-46c1-a05e-c533eb392195.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v15-8B/1762652580.239064", - "retrieved_timestamp": "1762652580.2390652", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-gamma-v15-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-gamma-v15-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4654428028741517 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.534326872652317 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11102719033232629 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42772916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3941156914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v16-8B/15deaa33-87a2-442e-9618-13f5ab6c299e.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v16-8B/15deaa33-87a2-442e-9618-13f5ab6c299e.json deleted file mode 100644 index f7e8b818026c25aa3d322cbba5040f03c470e950..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v16-8B/15deaa33-87a2-442e-9618-13f5ab6c299e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v16-8B/1762652580.2392871", - "retrieved_timestamp": "1762652580.239288", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-gamma-v16-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-gamma-v16-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4556510059974202 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5343925058514598 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4264270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39170545212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v17-8B/bd4cc259-d535-437a-afc5-d74a60154b07.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v17-8B/bd4cc259-d535-437a-afc5-d74a60154b07.json deleted file mode 100644 index d7edd8bbb171d6ca28b1a15bbdee1f2239338b2f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v17-8B/bd4cc259-d535-437a-afc5-d74a60154b07.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v17-8B/1762652580.239734", - "retrieved_timestamp": "1762652580.239739", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-gamma-v17-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-gamma-v17-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4462337708391512 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5346666279815969 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11102719033232629 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42906249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39228723404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v18-8B/aadb6262-4f31-4681-983c-0d19e8bbc5cd.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v18-8B/aadb6262-4f31-4681-983c-0d19e8bbc5cd.json deleted file mode 100644 index 0455e03c5fe507235f4ffde056880978a40fd096..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v18-8B/aadb6262-4f31-4681-983c-0d19e8bbc5cd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v18-8B/1762652580.240138", - "retrieved_timestamp": "1762652580.240139", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-gamma-v18-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-gamma-v18-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43409376011205825 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5339179190615058 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11102719033232629 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4316979166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3904587765957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-immersive-sof-v44-8B/41e3ecda-8988-456c-b413-19770e2f05c7.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-immersive-sof-v44-8B/41e3ecda-8988-456c-b413-19770e2f05c7.json deleted file mode 100644 index d25d470f1c7c474a04a787c7851e8aa8624007bd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-immersive-sof-v44-8B/41e3ecda-8988-456c-b413-19770e2f05c7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-immersive-sof-v44-8B/1762652580.2404292", - "retrieved_timestamp": "1762652580.24043", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-immersive-sof-v44-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-immersive-sof-v44-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44078821970150317 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5214884907801955 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11858006042296072 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4143958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3887965425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v10-8B/c57d95da-1b6f-4ce7-8c42-f1129fc1e55e.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v10-8B/c57d95da-1b6f-4ce7-8c42-f1129fc1e55e.json deleted file mode 100644 index f9fd412487e7b424e9f7e6c18dcbd2e89067d190..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v10-8B/c57d95da-1b6f-4ce7-8c42-f1129fc1e55e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v10-8B/1762652580.2406652", - "retrieved_timestamp": "1762652580.2406662", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-v10-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-v10-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4261503920708165 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5375875314179012 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4223645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38314494680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v11-8B/9a6b85d5-bb26-4832-915e-8b1ac90b0793.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v11-8B/9a6b85d5-bb26-4832-915e-8b1ac90b0793.json deleted file mode 100644 index af34a68ea3351a4dac508a472cee353980554daf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v11-8B/9a6b85d5-bb26-4832-915e-8b1ac90b0793.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v11-8B/1762652580.240909", - "retrieved_timestamp": "1762652580.24091", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-v11-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-v11-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44263664853699297 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5359208647512345 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13217522658610273 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4184270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3835605053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v12-8B/4bcdbab0-7220-40bb-832f-01003f59da0f.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v12-8B/4bcdbab0-7220-40bb-832f-01003f59da0f.json deleted file mode 100644 index 53dba018158342a64b70af9e2b3439f0090ea6d4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v12-8B/4bcdbab0-7220-40bb-832f-01003f59da0f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v12-8B/1762652580.2411451", - "retrieved_timestamp": "1762652580.241146", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-v12-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-v12-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43779061778303796 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5348808250181011 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13670694864048338 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42106249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3835605053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v2-8B/8f16aed2-8b31-48cc-b874-8d437f26f3db.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v2-8B/8f16aed2-8b31-48cc-b874-8d437f26f3db.json deleted file mode 100644 index c024ad49ac6f912e551c248f22b70313aa531a4a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v2-8B/8f16aed2-8b31-48cc-b874-8d437f26f3db.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v2-8B/1762652580.241379", - "retrieved_timestamp": "1762652580.2413802", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-v2-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-v2-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4395891789341171 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5341160060985229 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13217522658610273 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42106249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3826462765957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v3-8B/262a66ee-04e4-49d5-8bb2-efe0a93801ad.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v3-8B/262a66ee-04e4-49d5-8bb2-efe0a93801ad.json deleted file mode 100644 index f726072b8cc7f9f4c9e7e6830de3d4b9a1ab600f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v3-8B/262a66ee-04e4-49d5-8bb2-efe0a93801ad.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v3-8B/1762652580.241601", - "retrieved_timestamp": "1762652580.241602", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-v3-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-v3-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4410630460511443 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5330987974156178 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4223958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38214760638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v4-8B/fd2a2a9c-639f-4348-9861-00271ed070b2.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v4-8B/fd2a2a9c-639f-4348-9861-00271ed070b2.json deleted file mode 100644 index d52230f9c93306ffbf3f96ee132822922d483da6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v4-8B/fd2a2a9c-639f-4348-9861-00271ed070b2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v4-8B/1762652580.241815", - "retrieved_timestamp": "1762652580.241816", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-v4-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-v4-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4289230353240513 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5336560458316563 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12537764350453173 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41972916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38173204787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v5-8B/53c89eb1-49ab-4e5f-b1ad-d8e80045a292.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v5-8B/53c89eb1-49ab-4e5f-b1ad-d8e80045a292.json deleted file mode 100644 index 70cffc61938a91c6515e927b568af49021a664ac..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v5-8B/53c89eb1-49ab-4e5f-b1ad-d8e80045a292.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v5-8B/1762652580.2420359", - "retrieved_timestamp": "1762652580.2420359", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-v5-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-v5-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44595894448951 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5344958011609363 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12613293051359517 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4223958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3820644946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v6-8B/c0cc1ad5-9e53-45ac-becb-f8ce3e5ac631.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v6-8B/c0cc1ad5-9e53-45ac-becb-f8ce3e5ac631.json deleted file mode 100644 index f21fb54a4f8b51e874705e8d22abfec7d689ee67..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v6-8B/c0cc1ad5-9e53-45ac-becb-f8ce3e5ac631.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v6-8B/1762652580.242274", - "retrieved_timestamp": "1762652580.242275", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-v6-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-v6-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4395891789341171 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5379609044843678 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4184270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3820644946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v7-8B/798c2f08-e10b-4115-bdd5-0d6053d03b60.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v7-8B/798c2f08-e10b-4115-bdd5-0d6053d03b60.json deleted file mode 100644 index 57bee684b0fae83ec1c9e112a80360dc00f7dbee..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v7-8B/798c2f08-e10b-4115-bdd5-0d6053d03b60.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v7-8B/1762652580.242492", - "retrieved_timestamp": "1762652580.242493", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-v7-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-v7-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4276741268722545 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5334882804815716 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1336858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41709375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3835605053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v8-8B/388ef85a-db27-4851-9e6e-2002a75bc6c7.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v8-8B/388ef85a-db27-4851-9e6e-2002a75bc6c7.json deleted file mode 100644 index 3757ac2f7c2a358605ccbe65e3a31cc9f706cf25..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v8-8B/388ef85a-db27-4851-9e6e-2002a75bc6c7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v8-8B/1762652580.242712", - "retrieved_timestamp": "1762652580.242713", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-v8-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-v8-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43834027048232027 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5359208647512345 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13066465256797583 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42103124999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38272938829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v9-8B/cd0c4096-93ee-4a04-83b0-44063770e81b.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v9-8B/cd0c4096-93ee-4a04-83b0-44063770e81b.json deleted file mode 100644 index 9890fd64a8dec33896453895c2f3321a99820cbe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v9-8B/cd0c4096-93ee-4a04-83b0-44063770e81b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v9-8B/1762652580.242934", - "retrieved_timestamp": "1762652580.242935", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-v9-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-v9-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43686640336529303 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5360680608930435 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4183958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3819813829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v9-TitanFusion-Mix-8B/69f3e2b2-8918-41a8-abc6-c84c3d674f94.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v9-TitanFusion-Mix-8B/69f3e2b2-8918-41a8-abc6-c84c3d674f94.json deleted file mode 100644 index 34b1d850b0803777587d7eddb8e9133da5d51f17..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v9-TitanFusion-Mix-8B/69f3e2b2-8918-41a8-abc6-c84c3d674f94.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v9-TitanFusion-Mix-8B/1762652580.243146", - "retrieved_timestamp": "1762652580.243147", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-EVAA-v9-TitanFusion-Mix-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-EVAA-v9-TitanFusion-Mix-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.428373382624769 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5539931244833417 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1148036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43544791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3836436170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Elusive-8b/60d775f1-47a9-45ae-9b2f-75b95c9d96cd.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Elusive-8b/60d775f1-47a9-45ae-9b2f-75b95c9d96cd.json deleted file mode 100644 index e45486c4447ed44a38ff917f3b3a0929c7d5ab27..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Elusive-8b/60d775f1-47a9-45ae-9b2f-75b95c9d96cd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-Elusive-8b/1762652580.243371", - "retrieved_timestamp": "1762652580.243371", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-Elusive-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-Elusive-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41688275996577967 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5338593917060857 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12613293051359517 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4077916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3759973404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Elusive-VENN-8B/d3af54be-9d9a-4a4a-b03e-3468a801795e.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Elusive-VENN-8B/d3af54be-9d9a-4a4a-b03e-3468a801795e.json deleted file mode 100644 index c9dad4af5538793def650e9165370153ef2eb8c2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Elusive-VENN-8B/d3af54be-9d9a-4a4a-b03e-3468a801795e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-Elusive-VENN-8B/1762652580.243592", - "retrieved_timestamp": "1762652580.243593", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-Elusive-VENN-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-Elusive-VENN-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4232525255211727 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5355598563659026 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4156979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3797373670212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Elusive-VENN-Asymmetric-8B/e7cf15b2-0347-48a8-bf84-08e27b3688fd.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Elusive-VENN-Asymmetric-8B/e7cf15b2-0347-48a8-bf84-08e27b3688fd.json deleted file mode 100644 index 3e6d0d1e2eb3295aad638b1822186d2f8e65ab8f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Elusive-VENN-Asymmetric-8B/e7cf15b2-0347-48a8-bf84-08e27b3688fd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-Elusive-VENN-Asymmetric-8B/1762652580.243807", - "retrieved_timestamp": "1762652580.243807", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-Elusive-VENN-Asymmetric-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-Elusive-VENN-Asymmetric-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4541771388803929 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5312976840812583 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42506249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3842253989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Elusive-VENN-Aurora_faustus-8B/8befbe9f-3ab2-4bc8-bd16-5badd2291d5d.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Elusive-VENN-Aurora_faustus-8B/8befbe9f-3ab2-4bc8-bd16-5badd2291d5d.json deleted file mode 100644 index 3fef12559b6266b0dc2f9a7ff7eb8a3dd8a6c0ce..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Elusive-VENN-Aurora_faustus-8B/8befbe9f-3ab2-4bc8-bd16-5badd2291d5d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-Elusive-VENN-Aurora_faustus-8B/1762652580.244045", - "retrieved_timestamp": "1762652580.244046", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-Elusive-VENN-Aurora_faustus-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-Elusive-VENN-Aurora_faustus-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4335441074127758 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5303980337010061 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.417 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3794880319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-VENN-8B/e14cedfb-79a9-446a-ba16-64f378a47b4a.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-VENN-8B/e14cedfb-79a9-446a-ba16-64f378a47b4a.json deleted file mode 100644 index b3b63f9ae9045306340b5e18ec1bfa213cf5929f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-VENN-8B/e14cedfb-79a9-446a-ba16-64f378a47b4a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-VENN-8B/1762652580.24428", - "retrieved_timestamp": "1762652580.244281", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/Kosmos-VENN-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/Kosmos-VENN-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.433219413378724 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5317923607687299 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14123867069486404 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42109375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800698138297872 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_PRP-Kosmos-EVAA-8B/84a37d06-2668-4143-8e2f-5a08651f2dfb.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_PRP-Kosmos-EVAA-8B/84a37d06-2668-4143-8e2f-5a08651f2dfb.json deleted file mode 100644 index 3b5059a71748960e6de1e6fcbd4b55f20dd0cc36..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_PRP-Kosmos-EVAA-8B/84a37d06-2668-4143-8e2f-5a08651f2dfb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_PRP-Kosmos-EVAA-8B/1762652580.244709", - "retrieved_timestamp": "1762652580.24471", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/PRP-Kosmos-EVAA-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/PRP-Kosmos-EVAA-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36327721556580983 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5237421324582278 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09592145015105741 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.425 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3765791223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_PRP-Kosmos-EVAA-light-8B/72c9dcd4-ab00-4f36-a1e6-43e241c8b967.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_PRP-Kosmos-EVAA-light-8B/72c9dcd4-ab00-4f36-a1e6-43e241c8b967.json deleted file mode 100644 index f8c0c2bffe3b64aa3eebe05695ce69e427372fc1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_PRP-Kosmos-EVAA-light-8B/72c9dcd4-ab00-4f36-a1e6-43e241c8b967.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_PRP-Kosmos-EVAA-light-8B/1762652580.2449658", - "retrieved_timestamp": "1762652580.244967", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/PRP-Kosmos-EVAA-light-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/PRP-Kosmos-EVAA-light-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4321201079801593 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5274582578494339 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4235416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3631150265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_TSN-Kosmos-EVAA-8B/9819f2bd-8108-4fc5-9208-ce295d860435.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_TSN-Kosmos-EVAA-8B/9819f2bd-8108-4fc5-9208-ce295d860435.json deleted file mode 100644 index 757adba05f253f00943aaed65e873229c99d506f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_TSN-Kosmos-EVAA-8B/9819f2bd-8108-4fc5-9208-ce295d860435.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_TSN-Kosmos-EVAA-8B/1762652580.2451851", - "retrieved_timestamp": "1762652580.245186", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/TSN-Kosmos-EVAA-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/TSN-Kosmos-EVAA-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49032234471203073 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5347376087743225 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14501510574018128 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4173125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.383061835106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_TSN-Kosmos-EVAA-v2-8B/2ce2b8e4-0cd4-4001-8790-ad5e26e3e45c.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_TSN-Kosmos-EVAA-v2-8B/2ce2b8e4-0cd4-4001-8790-ad5e26e3e45c.json deleted file mode 100644 index 93618df890894c2eb7eca0d14f8338758150746d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_TSN-Kosmos-EVAA-v2-8B/2ce2b8e4-0cd4-4001-8790-ad5e26e3e45c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_TSN-Kosmos-EVAA-v2-8B/1762652580.2454138", - "retrieved_timestamp": "1762652580.245415", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/TSN-Kosmos-EVAA-v2-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/TSN-Kosmos-EVAA-v2-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46669171132594844 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.534342097284994 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41864583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3762466755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-1/b6ca35e1-8680-49e8-a6dd-963214be7411.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-1/b6ca35e1-8680-49e8-a6dd-963214be7411.json deleted file mode 100644 index dd316fe9d0c8ad1431cbd64d6599b92c61717286..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-1/b6ca35e1-8680-49e8-a6dd-963214be7411.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-1/1762652580.2456498", - "retrieved_timestamp": "1762652580.245653", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bbb-1", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bbb-1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4864005283758206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5375556962119087 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13670694864048338 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41706250000000006 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38971077127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-2/155b7412-cc16-45c3-9261-acc9322a0dcc.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-2/155b7412-cc16-45c3-9261-acc9322a0dcc.json deleted file mode 100644 index 786bf64e9de5e4e7fbc54dfb47b8d40d40646ec4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-2/155b7412-cc16-45c3-9261-acc9322a0dcc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-2/1762652580.2460952", - "retrieved_timestamp": "1762652580.2460968", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bbb-2", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bbb-2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4077403511571519 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5066789926627318 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4144583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.363530585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-3/94668ddb-d2fb-44e2-8ed7-10179d145366.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-3/94668ddb-d2fb-44e2-8ed7-10179d145366.json deleted file mode 100644 index d2f7766992adeda3feadce2514fd071c29a49550..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-3/94668ddb-d2fb-44e2-8ed7-10179d145366.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-3/1762652580.24635", - "retrieved_timestamp": "1762652580.246351", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bbb-3", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bbb-3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.416832892281369 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5157831821186084 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1404833836858006 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4264895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38563829787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-4/828a6bd0-a205-4327-bc77-2e8a84c0b69e.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-4/828a6bd0-a205-4327-bc77-2e8a84c0b69e.json deleted file mode 100644 index 807a6673f346f750f1509cbc1c6caacb7bb3b8fd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-4/828a6bd0-a205-4327-bc77-2e8a84c0b69e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-4/1762652580.2465842", - "retrieved_timestamp": "1762652580.2465851", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bbb-4", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bbb-4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47675833455232114 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.52115051798211 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40924999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3773271276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-5/8c0a66fb-c87d-489d-b071-b4a599562ead.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-5/8c0a66fb-c87d-489d-b071-b4a599562ead.json deleted file mode 100644 index 7245abd9d8df05ae4d4ba74c438e22751bf0a15a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-5/8c0a66fb-c87d-489d-b071-b4a599562ead.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-5/1762652580.2468202", - "retrieved_timestamp": "1762652580.2468212", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bbb-5", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bbb-5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4702888336281067 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206902586604485 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13972809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3998229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3833942819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-6/ef8025de-fe9f-4a79-97f6-c26c18ab049a.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-6/ef8025de-fe9f-4a79-97f6-c26c18ab049a.json deleted file mode 100644 index aa08703012056ae1bcac87fd46d4fa8b4686fe22..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-6/ef8025de-fe9f-4a79-97f6-c26c18ab049a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-6/1762652580.247051", - "retrieved_timestamp": "1762652580.247051", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bbb-6", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bbb-6" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48797413086166924 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5211453749255449 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13897280966767372 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40515625000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3871343085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-7/a31fbd82-2e21-40e7-a73a-c6351c80bae7.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-7/a31fbd82-2e21-40e7-a73a-c6351c80bae7.json deleted file mode 100644 index 1c2bcd4025f48827432767dda3532526e9abb895..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-7/a31fbd82-2e21-40e7-a73a-c6351c80bae7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-7/1762652580.2473001", - "retrieved_timestamp": "1762652580.247304", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bbb-7", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bbb-7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48280340607366234 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5211089947725771 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13670694864048338 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4038229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859707446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-1/15ec7997-1333-43c6-869a-ce4589af56d1.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-1/15ec7997-1333-43c6-869a-ce4589af56d1.json deleted file mode 100644 index f27ac48084243aee73f049ac0c57c270677c1c84..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-1/15ec7997-1333-43c6-869a-ce4589af56d1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-1/1762652580.2475939", - "retrieved_timestamp": "1762652580.247595", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-1", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42842325030917966 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5890155164168736 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4441041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3449135638297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-10/86411dbb-e28b-4e9d-856e-fcc001252fbe.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-10/86411dbb-e28b-4e9d-856e-fcc001252fbe.json deleted file mode 100644 index 8867badf1dc87b7b74529b8cce9e5581a07a7e12..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-10/86411dbb-e28b-4e9d-856e-fcc001252fbe.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-10/1762652580.247846", - "retrieved_timestamp": "1762652580.2478468", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-10", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-10" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46184568057199343 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5856025427339699 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41985416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37076130319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-11/804f4be8-a8a9-473f-a898-d71b742a62eb.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-11/804f4be8-a8a9-473f-a898-d71b742a62eb.json deleted file mode 100644 index aea58f2124a4f9ac1732a5b6eebc3154fa203fe4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-11/804f4be8-a8a9-473f-a898-d71b742a62eb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-11/1762652580.2481", - "retrieved_timestamp": "1762652580.2481012", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-11", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-11" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45754930251732073 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5851155912628809 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4145520833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3738364361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-12/736ee66e-bd19-4275-afaf-73c2112c2fbd.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-12/736ee66e-bd19-4275-afaf-73c2112c2fbd.json deleted file mode 100644 index 165d0a1a8261b5b75704c982816e1f6f35fe71d7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-12/736ee66e-bd19-4275-afaf-73c2112c2fbd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-12/1762652580.248367", - "retrieved_timestamp": "1762652580.248368", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-12", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-12" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47338617091539337 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5802489392471556 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11858006042296072 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4144895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37367021276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-13/da5a3c32-371f-44e5-89a7-c9ba6e98664e.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-13/da5a3c32-371f-44e5-89a7-c9ba6e98664e.json deleted file mode 100644 index 2b65c2a3b090aa859ab0d576ece775313ce12d98..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-13/da5a3c32-371f-44e5-89a7-c9ba6e98664e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-13/1762652580.248588", - "retrieved_timestamp": "1762652580.248588", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-13", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-13" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4697890486132351 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5777886799254942 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41585416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37300531914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-15/af3bd92d-45f5-4a48-89aa-b8c956209d5a.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-15/af3bd92d-45f5-4a48-89aa-b8c956209d5a.json deleted file mode 100644 index 05607539ab60e285f95704a581bd273a237326ce..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-15/af3bd92d-45f5-4a48-89aa-b8c956209d5a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-15/1762652580.248791", - "retrieved_timestamp": "1762652580.2487922", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-15", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-15" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47453534399836883 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5818643001829722 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4105208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37666223404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-16/c98928d3-0d7f-429c-927c-bf8fa432101a.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-16/c98928d3-0d7f-429c-927c-bf8fa432101a.json deleted file mode 100644 index be97e7cb59a8ffacb8db51eb4e8187acad70048d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-16/c98928d3-0d7f-429c-927c-bf8fa432101a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-16/1762652580.2489972", - "retrieved_timestamp": "1762652580.248998", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-16", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-16" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4730614768813415 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5783335636603978 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4158541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37757646276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-17/787d8040-25c8-4893-b140-cf041260d767.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-17/787d8040-25c8-4893-b140-cf041260d767.json deleted file mode 100644 index 1a41644f39e0a9a838f6797e271ead0b31eb0a8b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-17/787d8040-25c8-4893-b140-cf041260d767.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-17/1762652580.249204", - "retrieved_timestamp": "1762652580.2492049", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-17", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-17" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4721871301480073 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5776302177859685 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11329305135951662 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41582291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37566489361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-18/6aad7ade-7bd0-4515-b4ac-2299c58da098.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-18/6aad7ade-7bd0-4515-b4ac-2299c58da098.json deleted file mode 100644 index 0ccf4a78410087cceb1835187efbd4ebed296219..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-18/6aad7ade-7bd0-4515-b4ac-2299c58da098.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-18/1762652580.249514", - "retrieved_timestamp": "1762652580.249515", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-18", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-18" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47246195649764844 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5823837707078298 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11858006042296072 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4184895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37566489361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-19/81914fd7-1410-4b80-9be9-6ebfbb664613.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-19/81914fd7-1410-4b80-9be9-6ebfbb664613.json deleted file mode 100644 index b85692f950e4b70aaca7728d46c268695b69279c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-19/81914fd7-1410-4b80-9be9-6ebfbb664613.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-19/1762652580.249828", - "retrieved_timestamp": "1762652580.249829", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-19", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-19" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45842364925065493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5765774285787187 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.417125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3774933510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-2/3e4b8dcc-9270-4b14-952f-c6b96ee8ce57.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-2/3e4b8dcc-9270-4b14-952f-c6b96ee8ce57.json deleted file mode 100644 index e2e90f5bd9eae2f3fadac0592bcbfd94bfd0b8f8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-2/3e4b8dcc-9270-4b14-952f-c6b96ee8ce57.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-2/1762652580.250077", - "retrieved_timestamp": "1762652580.250078", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-2", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45792386423578324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5937358907182445 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41864583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3695146276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-20/cfe4ab09-c772-4617-88b6-77e49553605b.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-20/cfe4ab09-c772-4617-88b6-77e49553605b.json deleted file mode 100644 index 8d19b38f31fe53256438f6f3bc814b321fde45d9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-20/cfe4ab09-c772-4617-88b6-77e49553605b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-20/1762652580.2503", - "retrieved_timestamp": "1762652580.2503", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-20", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-20" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4727367828472896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.574973333640619 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4105208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3768284574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-21/a369ff4f-7fe9-4764-be74-83563dbaf635.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-21/a369ff4f-7fe9-4764-be74-83563dbaf635.json deleted file mode 100644 index b5e7c12b59330d1231ff7fcd93d3157707e92672..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-21/a369ff4f-7fe9-4764-be74-83563dbaf635.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-21/1762652580.25052", - "retrieved_timestamp": "1762652580.2505212", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-21", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-21" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47001400727846554 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5738369241857685 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4157916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37757646276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-22/f3815ff9-c1bd-4706-a770-4c0b0e8c5d13.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-22/f3815ff9-c1bd-4706-a770-4c0b0e8c5d13.json deleted file mode 100644 index 46623a261013ba54c825a336858b00aebbc68c1a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-22/f3815ff9-c1bd-4706-a770-4c0b0e8c5d13.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-22/1762652580.250869", - "retrieved_timestamp": "1762652580.25087", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-22", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-22" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45999725173650363 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.579296884452635 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11858006042296072 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41715625000000006 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3764128989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-23/f4db95ae-8e3d-45ed-9c53-3b30fde0cb3e.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-23/f4db95ae-8e3d-45ed-9c53-3b30fde0cb3e.json deleted file mode 100644 index 0804217926b265ef06ca1310af38e9b485bdc74f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-23/f4db95ae-8e3d-45ed-9c53-3b30fde0cb3e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-23/1762652580.2511601", - "retrieved_timestamp": "1762652580.251161", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-23", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-23" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46576749690820357 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.570027700842045 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4197291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37957114361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-24/0b27b829-6588-4f7b-80fe-6e6767287a38.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-24/0b27b829-6588-4f7b-80fe-6e6767287a38.json deleted file mode 100644 index cf5fba37d34a5ba7efb24f09946c2b12167df332..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-24/0b27b829-6588-4f7b-80fe-6e6767287a38.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-24/1762652580.251392", - "retrieved_timestamp": "1762652580.251392", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-24", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-24" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4715377420799035 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5716684749879075 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4157604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38090093085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-25/a0c16d3d-e3f2-4c50-975a-70b69824b3d5.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-25/a0c16d3d-e3f2-4c50-975a-70b69824b3d5.json deleted file mode 100644 index 355db32fe83ce74a1bbbc5198344efa2bf78664d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-25/a0c16d3d-e3f2-4c50-975a-70b69824b3d5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-25/1762652580.251633", - "retrieved_timestamp": "1762652580.251633", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-25", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-25" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47518473206647255 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5705628020556314 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11329305135951662 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4117916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37824135638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-26/0218b7de-bbd7-4196-8fec-3f6fb790a3c1.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-26/0218b7de-bbd7-4196-8fec-3f6fb790a3c1.json deleted file mode 100644 index 9ab8a4a072fb3ce72be6ffa4ac22dcf1b09705d9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-26/0218b7de-bbd7-4196-8fec-3f6fb790a3c1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-26/1762652580.251851", - "retrieved_timestamp": "1762652580.251852", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-26", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-26" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4690897928607206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5734958656360526 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163141993957704 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4276979166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3771609042553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-27/68435a43-944b-4c66-979b-eb48f7a8e77a.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-27/68435a43-944b-4c66-979b-eb48f7a8e77a.json deleted file mode 100644 index 5ef6ecb91871e777f1a4cfc56c677387fe4eb318..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-27/68435a43-944b-4c66-979b-eb48f7a8e77a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-27/1762652580.2520802", - "retrieved_timestamp": "1762652580.252081", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-27", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-27" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4818791916559174 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.571405917910282 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.409125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3799035904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-28/0dc95982-e5b0-4011-9e5b-48af7e3048f0.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-28/0dc95982-e5b0-4011-9e5b-48af7e3048f0.json deleted file mode 100644 index 3050d6e3ca49818ff0c1468dfa91f85a239b84e8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-28/0dc95982-e5b0-4011-9e5b-48af7e3048f0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-28/1762652580.252297", - "retrieved_timestamp": "1762652580.2522979", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-28", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-28" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4785070280189896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5702617832390487 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.413125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3812333776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-29/012eeeed-c556-460d-82f6-34bdc31da5cf.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-29/012eeeed-c556-460d-82f6-34bdc31da5cf.json deleted file mode 100644 index 2b6817e50f5554237bd1a59c1b38c9f89e40546f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-29/012eeeed-c556-460d-82f6-34bdc31da5cf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-29/1762652580.252519", - "retrieved_timestamp": "1762652580.2525198", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-29", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-29" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46881496651107946 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5670161357895335 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4236979166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38189827127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-3/37e59290-b4ea-4a44-bfb0-cdbe781c4d7f.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-3/37e59290-b4ea-4a44-bfb0-cdbe781c4d7f.json deleted file mode 100644 index 94d8d7c62905a55738ca8fa528b63d217f97c67f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-3/37e59290-b4ea-4a44-bfb0-cdbe781c4d7f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-3/1762652580.2527301", - "retrieved_timestamp": "1762652580.2527308", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-3", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4663670172918966 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5890722855221537 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1148036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41728125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37017952127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-30/6d3a64df-5ebb-4cd8-bd6c-de799d185fe1.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-30/6d3a64df-5ebb-4cd8-bd6c-de799d185fe1.json deleted file mode 100644 index 3525e43b4d79672b03c2439b174b7b8293dbbf16..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-30/6d3a64df-5ebb-4cd8-bd6c-de799d185fe1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-30/1762652580.252943", - "retrieved_timestamp": "1762652580.2529438", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-30", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-30" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46664184364153777 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5705838505746653 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4144270833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3781582446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-31/a637936e-646b-4c21-964a-61e253fd3705.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-31/a637936e-646b-4c21-964a-61e253fd3705.json deleted file mode 100644 index b4745b2904324fa2b1b2ac65f301eabba14f66e6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-31/a637936e-646b-4c21-964a-61e253fd3705.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-31/1762652580.253162", - "retrieved_timestamp": "1762652580.253163", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-31", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-31" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4727367828472896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5665082303171874 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4104270833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3819813829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-32/a56c62cc-c318-4de4-b6c7-0fa10229a127.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-32/a56c62cc-c318-4de4-b6c7-0fa10229a127.json deleted file mode 100644 index 5bb64bb99c85880d28de553a259b954bb585528a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-32/a56c62cc-c318-4de4-b6c7-0fa10229a127.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-32/1762652580.253373", - "retrieved_timestamp": "1762652580.2533739", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-32", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-32" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4635943740386619 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5662056335064284 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4157291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3812333776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-33/bcab8546-ea69-4207-b69b-ab982b603e55.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-33/bcab8546-ea69-4207-b69b-ab982b603e55.json deleted file mode 100644 index 7dcf2f445fda1d12dca55e2d2c639321934f53c3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-33/bcab8546-ea69-4207-b69b-ab982b603e55.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-33/1762652580.25359", - "retrieved_timestamp": "1762652580.253591", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-33", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-33" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4685401401614383 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5652966799156172 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4156979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38081781914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-34/6097086b-8c8b-493e-af1a-71146a2ed566.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-34/6097086b-8c8b-493e-af1a-71146a2ed566.json deleted file mode 100644 index 365e7bdc660bd5010d818e2c167e2c0561f31bfe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-34/6097086b-8c8b-493e-af1a-71146a2ed566.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-34/1762652580.253809", - "retrieved_timestamp": "1762652580.25381", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-34", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-34" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4623953332712758 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5681235912530039 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4184583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38040226063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-35/7166192e-42b0-4990-8218-88bb38fd1bdb.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-35/7166192e-42b0-4990-8218-88bb38fd1bdb.json deleted file mode 100644 index f54e357ef51c95435590be0d1b233719a715ca32..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-35/7166192e-42b0-4990-8218-88bb38fd1bdb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-35/1762652580.2540212", - "retrieved_timestamp": "1762652580.254022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-35", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-35" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47213726246359655 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5639648300586834 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41830208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3829787234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-36/3a4f8c97-9f30-44b8-8f79-7f19f90a08d1.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-36/3a4f8c97-9f30-44b8-8f79-7f19f90a08d1.json deleted file mode 100644 index 556b75506bfad31ca063fba25246cdcaab854877..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-36/3a4f8c97-9f30-44b8-8f79-7f19f90a08d1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-36/1762652580.2542279", - "retrieved_timestamp": "1762652580.254229", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-36", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-36" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4665919759571271 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5664445599052024 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4196354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.383061835106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-37/19490f78-486d-4325-b31e-af8555c32ea9.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-37/19490f78-486d-4325-b31e-af8555c32ea9.json deleted file mode 100644 index 283c19ddd0e0e3d6c9266b7782a70ba158a556b1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-37/19490f78-486d-4325-b31e-af8555c32ea9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-37/1762652580.2544441", - "retrieved_timestamp": "1762652580.254445", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-37", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-37" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48797413086166924 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.562488460737535 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4156354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3828125 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-38/61e7c49e-abb9-4e38-ba3f-1018db104d83.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-38/61e7c49e-abb9-4e38-ba3f-1018db104d83.json deleted file mode 100644 index 46a5c321bfc3d60bf3a166ca8262034b0213c864..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-38/61e7c49e-abb9-4e38-ba3f-1018db104d83.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-38/1762652580.2548852", - "retrieved_timestamp": "1762652580.2548869", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-38", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-38" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46179581288758276 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5658176339168742 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4117291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3810671542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-39/243e6b7b-a34f-44cd-b027-176f877ff8e7.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-39/243e6b7b-a34f-44cd-b027-176f877ff8e7.json deleted file mode 100644 index 64945462b2e491db1a861ad18181a0542900f839..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-39/243e6b7b-a34f-44cd-b027-176f877ff8e7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-39/1762652580.2552152", - "retrieved_timestamp": "1762652580.2552161", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-39", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-39" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45759917020173135 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5633012248625926 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12537764350453173 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4262395833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38314494680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-4/85ba493b-05f1-4853-a0ff-44570a7c2a82.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-4/85ba493b-05f1-4853-a0ff-44570a7c2a82.json deleted file mode 100644 index 68408fe157f9c2039dbfafe1eb5b9e86029350ff..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-4/85ba493b-05f1-4853-a0ff-44570a7c2a82.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-4/1762652580.2554429", - "retrieved_timestamp": "1762652580.255444", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-4", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4672912317096415 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5892000111391051 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1095166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41728125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3705119680851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-40/56837896-11a6-458b-a17e-9540ab5ae66a.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-40/56837896-11a6-458b-a17e-9540ab5ae66a.json deleted file mode 100644 index efcca4c37c4957e40a9c02f0260291cc87a368a0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-40/56837896-11a6-458b-a17e-9540ab5ae66a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-40/1762652580.2556531", - "retrieved_timestamp": "1762652580.2556539", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-40", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-40" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45357761849669986 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5633956317971519 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4236041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38347739361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-41/db0c4182-7391-40e7-ad6e-5374c8eb28e1.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-41/db0c4182-7391-40e7-ad6e-5374c8eb28e1.json deleted file mode 100644 index 4d0c9274fa2bf2e3cb0668db36eb72165745a8a3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-41/db0c4182-7391-40e7-ad6e-5374c8eb28e1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-41/1762652580.2558541", - "retrieved_timestamp": "1762652580.2558541", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-41", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-41" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4739856912990864 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.56138466485423 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12537764350453173 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41827083333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38248005319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-42/265e3cbb-484f-4cf7-8994-050f414ecf37.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-42/265e3cbb-484f-4cf7-8994-050f414ecf37.json deleted file mode 100644 index d9b5b051020ebafa7c0ba1644b4792b6b9e02777..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-42/265e3cbb-484f-4cf7-8994-050f414ecf37.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-42/1762652580.25606", - "retrieved_timestamp": "1762652580.2560608", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-42", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-42" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4660423232578447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5645607204696422 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42100000000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3812333776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-43/472b725a-2bd5-440a-9768-ba8db6fe6b34.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-43/472b725a-2bd5-440a-9768-ba8db6fe6b34.json deleted file mode 100644 index 7e6657ac73eceb18070b9814dfaba4acf09c8afa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-43/472b725a-2bd5-440a-9768-ba8db6fe6b34.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-43/1762652580.2562718", - "retrieved_timestamp": "1762652580.2562718", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-43", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-43" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45999725173650363 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5635240412618795 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4156041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3819813829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-44/60c18178-ff40-4e9d-9683-077cc2fa254e.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-44/60c18178-ff40-4e9d-9683-077cc2fa254e.json deleted file mode 100644 index 93baeba8e3cf95e72861220647fb052436e69d92..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-44/60c18178-ff40-4e9d-9683-077cc2fa254e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-44/1762652580.2565289", - "retrieved_timestamp": "1762652580.2565298", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-44", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-44" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4706135276621586 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5642775941837409 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42487500000000006 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3833942819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-46/6b3c3872-cd4d-4827-8651-6baa9d2423e7.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-46/6b3c3872-cd4d-4827-8651-6baa9d2423e7.json deleted file mode 100644 index 97f840046b868b74ce113d2cd38c3c24b849e5e0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-46/6b3c3872-cd4d-4827-8651-6baa9d2423e7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-46/1762652580.2567308", - "retrieved_timestamp": "1762652580.256732", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-46", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-46" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4727367828472896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5631697539272891 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4262395833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3822307180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-47/9f30c4d4-4a3c-459e-8444-e143ef75f84e.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-47/9f30c4d4-4a3c-459e-8444-e143ef75f84e.json deleted file mode 100644 index 1f8b48b63f1ea468bcd73942f1802834dd1fb955..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-47/9f30c4d4-4a3c-459e-8444-e143ef75f84e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-47/1762652580.256935", - "retrieved_timestamp": "1762652580.2569358", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-47", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-47" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46516797652451053 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5545716016743777 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4156041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3854720744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-48/80bbd567-b13e-4ed4-ba85-9098639a3642.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-48/80bbd567-b13e-4ed4-ba85-9098639a3642.json deleted file mode 100644 index 75e6f76f2f6c9e80bd0345cfd072c6726d900373..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-48/80bbd567-b13e-4ed4-ba85-9098639a3642.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-48/1762652580.257132", - "retrieved_timestamp": "1762652580.257133", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-48", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-48" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46881496651107946 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5541308128775738 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12537764350453173 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4209375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859707446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-49/e574e35a-56cb-471d-b4f1-df0858f5ce66.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-49/e574e35a-56cb-471d-b4f1-df0858f5ce66.json deleted file mode 100644 index cd1b1654604c7dcf29a86ac3ee500b10c02871cd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-49/e574e35a-56cb-471d-b4f1-df0858f5ce66.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-49/1762652580.257362", - "retrieved_timestamp": "1762652580.257366", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-49", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-49" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47246195649764844 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5540285004706683 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41290625000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38081781914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-5/ec314c97-9bc0-4e14-9d57-d6204e699428.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-5/ec314c97-9bc0-4e14-9d57-d6204e699428.json deleted file mode 100644 index 67cada033819679c5ca9b4b13fe60c9cfc4f3652..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-5/ec314c97-9bc0-4e14-9d57-d6204e699428.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-5/1762652580.2577002", - "retrieved_timestamp": "1762652580.2577012", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-5", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46516797652451053 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5881569099353959 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10574018126888217 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4186145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37017952127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-50/980887dd-2948-4e5f-b22c-3cc03057f493.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-50/980887dd-2948-4e5f-b22c-3cc03057f493.json deleted file mode 100644 index da25aafae4f82a3f519abd241a43fd10e0f7488e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-50/980887dd-2948-4e5f-b22c-3cc03057f493.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-50/1762652580.257925", - "retrieved_timestamp": "1762652580.257926", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-50", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-50" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47246195649764844 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.555294802866646 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41687500000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3842253989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-51/6d544c96-53c9-43d1-9cb1-6077d7235fff.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-51/6d544c96-53c9-43d1-9cb1-6077d7235fff.json deleted file mode 100644 index 6d089439fcedcebf86bdaaed916cfe546a489f67..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-51/6d544c96-53c9-43d1-9cb1-6077d7235fff.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-51/1762652580.2581341", - "retrieved_timestamp": "1762652580.258135", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-51", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-51" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4630447213393795 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5557101784534039 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41681250000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38314494680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-52/fd3c9666-09bf-4562-b49d-eea905469761.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-52/fd3c9666-09bf-4562-b49d-eea905469761.json deleted file mode 100644 index 38dbecaca4729a6de20ddfd8bd62be918692afdc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-52/fd3c9666-09bf-4562-b49d-eea905469761.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-52/1762652580.258348", - "retrieved_timestamp": "1762652580.258349", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-52", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-52" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45362748618111054 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.544409095161705 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41690625000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38430851063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-53/978d4a27-17c7-4f87-b3e5-27b00ffa4d80.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-53/978d4a27-17c7-4f87-b3e5-27b00ffa4d80.json deleted file mode 100644 index 67f420e213877de2d0d653d9639f243d4d496172..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-53/978d4a27-17c7-4f87-b3e5-27b00ffa4d80.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-53/1762652580.25855", - "retrieved_timestamp": "1762652580.2585511", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-53", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-53" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4779573753197073 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5494367702137035 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29865771812080544 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4196041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38580452127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-54/9a2d7235-84cf-43f6-8855-68d0bf85e6e3.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-54/9a2d7235-84cf-43f6-8855-68d0bf85e6e3.json deleted file mode 100644 index 795a34aea3b7e8ccc207d589b97d926d9125878c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-54/9a2d7235-84cf-43f6-8855-68d0bf85e6e3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-54/1762652580.258788", - "retrieved_timestamp": "1762652580.258792", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-54", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-54" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48405231452545916 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5547738488653888 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4155416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38248005319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-55/7c388cc5-fb2f-48ba-967c-a931fcb25a42.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-55/7c388cc5-fb2f-48ba-967c-a931fcb25a42.json deleted file mode 100644 index d2c0d8478e78d3c5616facffd734bca182c63e1c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-55/7c388cc5-fb2f-48ba-967c-a931fcb25a42.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-55/1762652580.259115", - "retrieved_timestamp": "1762652580.259116", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-55", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-55" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47093822169621047 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5549641462109072 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42220833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3846409574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-56/348c8f2b-807f-464b-832e-0049f8329b86.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-56/348c8f2b-807f-464b-832e-0049f8329b86.json deleted file mode 100644 index f24b442cb8884e0ac5454c8311486b52cad69c8e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-56/348c8f2b-807f-464b-832e-0049f8329b86.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-56/1762652580.2593641", - "retrieved_timestamp": "1762652580.259365", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-56", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-56" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45999725173650363 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5446903231355648 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4116041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3843916223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-57/fab7388c-87ed-4108-ba4d-e1621925f264.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-57/fab7388c-87ed-4108-ba4d-e1621925f264.json deleted file mode 100644 index 7a9352ac7398212308b7014eaa15dae87e8cc754..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-57/fab7388c-87ed-4108-ba4d-e1621925f264.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-57/1762652580.259624", - "retrieved_timestamp": "1762652580.259625", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-57", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-57" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44051339335186196 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5424621834237494 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12613293051359517 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42103124999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3896276595744681 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-58/a9c1b649-8850-43d1-b5db-feefd0b8d0b4.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-58/a9c1b649-8850-43d1-b5db-feefd0b8d0b4.json deleted file mode 100644 index 1b4ed78771a9f54a91bf0aff6afd1f41e5ed8f99..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-58/a9c1b649-8850-43d1-b5db-feefd0b8d0b4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-58/1762652580.259867", - "retrieved_timestamp": "1762652580.259868", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-58", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-58" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4630447213393795 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5446322106157867 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13217522658610273 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4183333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3896276595744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-59/974b1542-8716-4ea3-b097-f9893c9c9656.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-59/974b1542-8716-4ea3-b097-f9893c9c9656.json deleted file mode 100644 index 22679a582820d2c0e0fc3c3b2cb28eb54bcd475b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-59/974b1542-8716-4ea3-b097-f9893c9c9656.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-59/1762652580.260088", - "retrieved_timestamp": "1762652580.2600892", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-59", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-59" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43414362779646887 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5511531646170439 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1540785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41700000000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3838098404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-6/e8dfd77c-e2c8-42ef-b341-5476411d038d.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-6/e8dfd77c-e2c8-42ef-b341-5476411d038d.json deleted file mode 100644 index b3acf6dcbb01abbb8e68e3637781d3b47e5bfae9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-6/e8dfd77c-e2c8-42ef-b341-5476411d038d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-6/1762652580.260308", - "retrieved_timestamp": "1762652580.260309", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-6", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-6" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4620706392372239 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5890658635262072 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10876132930513595 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41991666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36976396276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-60/16d14b95-fe8b-4e1f-94e1-65d966ba24d6.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-60/16d14b95-fe8b-4e1f-94e1-65d966ba24d6.json deleted file mode 100644 index 2240aec4cc29d38b82cca12b7b0e34e9cc2308d7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-60/16d14b95-fe8b-4e1f-94e1-65d966ba24d6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-60/1762652580.2605288", - "retrieved_timestamp": "1762652580.2605288", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-60", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-60" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42070484093316846 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5368509826419269 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1578549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42890625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3689328457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-61/00b1b367-c4eb-4048-b80d-a8253e7c2048.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-61/00b1b367-c4eb-4048-b80d-a8253e7c2048.json deleted file mode 100644 index b7d485d9cadad1696b3e47bee7b66c1eabcefc2c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-61/00b1b367-c4eb-4048-b80d-a8253e7c2048.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-61/1762652580.260743", - "retrieved_timestamp": "1762652580.260743", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-61", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-61" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42467652495378927 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5271029876122725 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4355729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3679355053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-62/85bd08bf-bdc3-42fb-b8f9-3d83e32921bc.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-62/85bd08bf-bdc3-42fb-b8f9-3d83e32921bc.json deleted file mode 100644 index fa9c30990860556b24cab0d98db985db08b82ca3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-62/85bd08bf-bdc3-42fb-b8f9-3d83e32921bc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-62/1762652580.260948", - "retrieved_timestamp": "1762652580.260949", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-62", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-62" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41498446344587914 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5379352222621877 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1623867069486405 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42890625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3719248670212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-63/c9df2e30-5e2d-42cc-8597-dc354602350a.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-63/c9df2e30-5e2d-42cc-8597-dc354602350a.json deleted file mode 100644 index 59deb5c66a70b677e7ac23593759d8d824dbb765..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-63/c9df2e30-5e2d-42cc-8597-dc354602350a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-63/1762652580.261157", - "retrieved_timestamp": "1762652580.261157", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-63", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-63" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43077146415954115 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49171126396743653 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11102719033232629 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4312604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3248005319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-64/90830134-43d5-4d0c-9a93-4be2c1c7dba8.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-64/90830134-43d5-4d0c-9a93-4be2c1c7dba8.json deleted file mode 100644 index 6705ea7f644def317565b497caf9aba3a162cffa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-64/90830134-43d5-4d0c-9a93-4be2c1c7dba8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-64/1762652580.261374", - "retrieved_timestamp": "1762652580.261375", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-64", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-64" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41401038134372353 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5359944334653838 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15483383685800603 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4355416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3692652925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-7/b63d1462-f84b-4d20-86d6-1a54cf4eb81f.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-7/b63d1462-f84b-4d20-86d6-1a54cf4eb81f.json deleted file mode 100644 index 5721cb645779158317d20848e58f1b231e896f75..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-7/b63d1462-f84b-4d20-86d6-1a54cf4eb81f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-7/1762652580.261788", - "retrieved_timestamp": "1762652580.261791", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-7", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4623953332712758 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5860594415302606 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41191666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3715093085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-8/f6dced28-f64c-4995-88b1-ac9a82903de2.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-8/f6dced28-f64c-4995-88b1-ac9a82903de2.json deleted file mode 100644 index 47959dd0898e0aeb67b161f4731ea6024cf0b1e5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-8/f6dced28-f64c-4995-88b1-ac9a82903de2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-8/1762652580.262149", - "retrieved_timestamp": "1762652580.262152", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-8", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-8" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45967255770245175 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5899505025903907 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4265208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37200797872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-9/956d92e9-51fb-4770-8687-6003f9594345.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-9/956d92e9-51fb-4770-8687-6003f9594345.json deleted file mode 100644 index 80a3dc66154df762edceb92294cae766178774e3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-9/956d92e9-51fb-4770-8687-6003f9594345.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_bh-9/1762652580.262652", - "retrieved_timestamp": "1762652580.2626529", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/bh-9", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/bh-9" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4508548429278758 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5850048697918168 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4145833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3702626329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_dp-6-8b/5c61d4f5-25a0-4ffe-a9d2-2a33d8bbd717.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_dp-6-8b/5c61d4f5-25a0-4ffe-a9d2-2a33d8bbd717.json deleted file mode 100644 index 56b878294b850868a4d973f6731ca45ff1792521..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_dp-6-8b/5c61d4f5-25a0-4ffe-a9d2-2a33d8bbd717.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_dp-6-8b/1762652580.263117", - "retrieved_timestamp": "1762652580.2631192", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/dp-6-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/dp-6-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4805804155197099 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5299697041031141 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44338541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38971077127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_dp-7-8b/44d85302-1af8-48ef-aebe-a9512c5bc387.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_dp-7-8b/44d85302-1af8-48ef-aebe-a9512c5bc387.json deleted file mode 100644 index f96126fb64d82faf067979c62217d49a7a64cf73..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_dp-7-8b/44d85302-1af8-48ef-aebe-a9512c5bc387.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_dp-7-8b/1762652580.2634509", - "retrieved_timestamp": "1762652580.2634518", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/dp-7-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/dp-7-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44983089314130953 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5290850650389306 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12613293051359517 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44075 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3933676861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_ek-6/a05ce252-928c-4482-95f7-f4c0fc2c7c10.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_ek-6/a05ce252-928c-4482-95f7-f4c0fc2c7c10.json deleted file mode 100644 index 8d60b81d5ef07a2c708d62c9b2b755c67c982bcd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_ek-6/a05ce252-928c-4482-95f7-f4c0fc2c7c10.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_ek-6/1762652580.2637498", - "retrieved_timestamp": "1762652580.263751", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/ek-6", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/ek-6" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4642437621067656 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5219292795769993 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13217522658610273 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4143645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3861369680851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_ek-7/23127691-ff90-433f-97d2-322e1191d821.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_ek-7/23127691-ff90-433f-97d2-322e1191d821.json deleted file mode 100644 index d1547609fa4f8aa3c2d2a1ac1d61120b37914239..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_ek-7/23127691-ff90-433f-97d2-322e1191d821.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_ek-7/1762652580.264135", - "retrieved_timestamp": "1762652580.2641358", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/ek-7", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/ek-7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47670846686791046 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5194098090521417 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41706249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38871343085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-1-8b/91d65b2a-a96a-467b-9e5c-9efa28d7fd96.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-1-8b/91d65b2a-a96a-467b-9e5c-9efa28d7fd96.json deleted file mode 100644 index 13897e64eb7b41edc84e96752172f5a45e0b30d0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-1-8b/91d65b2a-a96a-467b-9e5c-9efa28d7fd96.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_f-1-8b/1762652580.264415", - "retrieved_timestamp": "1762652580.264416", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/f-1-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/f-1-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49826571275327247 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5140825686172996 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45268749999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39070811170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-2-8b/c63fc798-cf74-4767-ba95-6353b6761bcc.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-2-8b/c63fc798-cf74-4767-ba95-6353b6761bcc.json deleted file mode 100644 index feb2695973a969a2b198032b0c915a81110f19a2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-2-8b/c63fc798-cf74-4767-ba95-6353b6761bcc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_f-2-8b/1762652580.264705", - "retrieved_timestamp": "1762652580.2647061", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/f-2-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/f-2-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48237897667078905 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5294150378468933 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4500520833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39619348404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-3-8b/5ba1e4d3-29d4-4337-bd10-9e1a5df29af4.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-3-8b/5ba1e4d3-29d4-4337-bd10-9e1a5df29af4.json deleted file mode 100644 index 180b125318c0cbae6cbc4ba5fbb31579d37b3575..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-3-8b/5ba1e4d3-29d4-4337-bd10-9e1a5df29af4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_f-3-8b/1762652580.264997", - "retrieved_timestamp": "1762652580.264998", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/f-3-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/f-3-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4803055891700687 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5274906581043712 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44208333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39544547872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-4-8b/a98ec95c-4af0-4b55-adbc-06e5ceecd00f.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-4-8b/a98ec95c-4af0-4b55-adbc-06e5ceecd00f.json deleted file mode 100644 index 09b854811be943233548252242eec4df10a61f09..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-4-8b/a98ec95c-4af0-4b55-adbc-06e5ceecd00f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_f-4-8b/1762652580.265391", - "retrieved_timestamp": "1762652580.2653928", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/f-4-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/f-4-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4797060687863757 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5288622486396436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1148036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45141666666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39561170212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-5-8b/4dd614dc-b68b-456c-ac55-f2221a479caa.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-5-8b/4dd614dc-b68b-456c-ac55-f2221a479caa.json deleted file mode 100644 index d4d7ddc10bac7880124fb337ceafe8d3856bcdd8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-5-8b/4dd614dc-b68b-456c-ac55-f2221a479caa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_f-5-8b/1762652580.265783", - "retrieved_timestamp": "1762652580.265785", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/f-5-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/f-5-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5043606519590242 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5313273519630752 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4460520833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39486369680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-6-8b/2a71c7d7-8ae6-45e7-ab7f-54f7d31dd131.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-6-8b/2a71c7d7-8ae6-45e7-ab7f-54f7d31dd131.json deleted file mode 100644 index ca5975e419df86fa73a32e6a1cad35a87d3353a6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-6-8b/2a71c7d7-8ae6-45e7-ab7f-54f7d31dd131.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_f-6-8b/1762652580.2661529", - "retrieved_timestamp": "1762652580.266155", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/f-6-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/f-6-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48460196722474147 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.524094753042471 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44735416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3939494680851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-7-8b/e8c5d934-c9b6-460c-bd45-c4a3e2d26bed.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-7-8b/e8c5d934-c9b6-460c-bd45-c4a3e2d26bed.json deleted file mode 100644 index 6e4770307095a1befcc71b110dbbe1d63c2ff6ed..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-7-8b/e8c5d934-c9b6-460c-bd45-c4a3e2d26bed.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_f-7-8b/1762652580.2664478", - "retrieved_timestamp": "1762652580.266449", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/f-7-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/f-7-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4462337708391512 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5277022085059414 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4315104166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39361702127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-8-8b/dad898e1-ee18-4864-b432-462d17ac8006.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-8-8b/dad898e1-ee18-4864-b432-462d17ac8006.json deleted file mode 100644 index d49e6fbb5d28465258eff4743186166ae3454d0f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-8-8b/dad898e1-ee18-4864-b432-462d17ac8006.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_f-8-8b/1762652580.266931", - "retrieved_timestamp": "1762652580.266932", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/f-8-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/f-8-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4739358236146758 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5259311478463803 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12235649546827794 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43544791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39403257978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-9-8b/1373c279-13b7-46d3-94a4-7b47c9319f88.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-9-8b/1373c279-13b7-46d3-94a4-7b47c9319f88.json deleted file mode 100644 index ec7913e9ed9065244fd6ef2099af95b56e585257..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-9-8b/1373c279-13b7-46d3-94a4-7b47c9319f88.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_f-9-8b/1762652580.267217", - "retrieved_timestamp": "1762652580.2672179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/f-9-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/f-9-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4601723427173233 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5291558412946383 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44608333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3943650265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fct-14-8b/22c3022f-d538-4a4d-8d4b-05e915506451.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fct-14-8b/22c3022f-d538-4a4d-8d4b-05e915506451.json deleted file mode 100644 index c265b7c6ad960847bcbbf66b65a00a0d55221a18..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fct-14-8b/22c3022f-d538-4a4d-8d4b-05e915506451.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_fct-14-8b/1762652580.2674618", - "retrieved_timestamp": "1762652580.267463", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/fct-14-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/fct-14-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4128612082607481 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206018889288543 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4185520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875498670212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fct-9-8b/4d1ddf64-4626-4877-a0fa-84e06f6cf977.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fct-9-8b/4d1ddf64-4626-4877-a0fa-84e06f6cf977.json deleted file mode 100644 index 93f909483c6b2b71edb496877c469fd491ceb6d2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fct-9-8b/4d1ddf64-4626-4877-a0fa-84e06f6cf977.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_fct-9-8b/1762652580.267691", - "retrieved_timestamp": "1762652580.267692", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/fct-9-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/fct-9-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4353925362482657 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.520510244410076 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42906249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39320146276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fr-1-8b/2014c198-5e12-41ef-8f65-7321d0423573.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fr-1-8b/2014c198-5e12-41ef-8f65-7321d0423573.json deleted file mode 100644 index 8677355713559d9050ae1bc38cfadde62be7ff79..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fr-1-8b/2014c198-5e12-41ef-8f65-7321d0423573.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_fr-1-8b/1762652580.267912", - "retrieved_timestamp": "1762652580.2679129", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/fr-1-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/fr-1-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.421079402651631 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5142290494968609 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4276979166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36103723404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fr-10-8b/725e5a72-548f-46d0-b268-12209e5cb085.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fr-10-8b/725e5a72-548f-46d0-b268-12209e5cb085.json deleted file mode 100644 index 4b0629e107a5150f634b9d37c321487acc3a9be9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fr-10-8b/725e5a72-548f-46d0-b268-12209e5cb085.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_fr-10-8b/1762652580.268136", - "retrieved_timestamp": "1762652580.268136", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/fr-10-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/fr-10-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44018869931781013 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206624978702634 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12235649546827794 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4118541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3863031914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fr-3-8b/8bdd1aba-81e4-44d1-acfd-6efeaf391ac8.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fr-3-8b/8bdd1aba-81e4-44d1-acfd-6efeaf391ac8.json deleted file mode 100644 index 36cd34ef2f1d283da58bc5d45a3cbf1e1d935251..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fr-3-8b/8bdd1aba-81e4-44d1-acfd-6efeaf391ac8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_fr-3-8b/1762652580.268359", - "retrieved_timestamp": "1762652580.26836", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/fr-3-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/fr-3-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4325700253106203 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5255174690526301 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11329305135951662 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41982291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3863031914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_gamma-Kosmos-EVAA-8B/6e5584a8-5b8e-48ce-8b80-2d39a74a9b0d.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_gamma-Kosmos-EVAA-8B/6e5584a8-5b8e-48ce-8b80-2d39a74a9b0d.json deleted file mode 100644 index b16c7d7baf8715ebaaa0d3f62cc82f05b371df25..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_gamma-Kosmos-EVAA-8B/6e5584a8-5b8e-48ce-8b80-2d39a74a9b0d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_gamma-Kosmos-EVAA-8B/1762652580.268576", - "retrieved_timestamp": "1762652580.268577", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/gamma-Kosmos-EVAA-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/gamma-Kosmos-EVAA-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42500121898784116 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5252624326543692 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44115624999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37757646276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_gamma-Kosmos-EVAA-v2-8B/67f972e1-4ebd-4b78-b740-fdc03ac88aac.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_gamma-Kosmos-EVAA-v2-8B/67f972e1-4ebd-4b78-b740-fdc03ac88aac.json deleted file mode 100644 index c9fbde590d4aed1b836ce41d85dc30667605982c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_gamma-Kosmos-EVAA-v2-8B/67f972e1-4ebd-4b78-b740-fdc03ac88aac.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_gamma-Kosmos-EVAA-v2-8B/1762652580.268805", - "retrieved_timestamp": "1762652580.268806", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/gamma-Kosmos-EVAA-v2-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/gamma-Kosmos-EVAA-v2-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4232525255211727 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5262464083930688 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10574018126888217 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3755817819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_gamma-Kosmos-EVAA-v3-8B/d461545f-ebcb-49e2-94ce-a6591e31a94a.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_gamma-Kosmos-EVAA-v3-8B/d461545f-ebcb-49e2-94ce-a6591e31a94a.json deleted file mode 100644 index 8162acbc7076b9331baee2f8abcccb6524083309..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_gamma-Kosmos-EVAA-v3-8B/d461545f-ebcb-49e2-94ce-a6591e31a94a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_gamma-Kosmos-EVAA-v3-8B/1762652580.269119", - "retrieved_timestamp": "1762652580.26912", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/gamma-Kosmos-EVAA-v3-8B", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/gamma-Kosmos-EVAA-v3-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43326928106313467 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.527793553969925 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11102719033232629 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4263020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3897938829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_knf-2-8b/267e641c-7fbd-40d3-a9b7-eb3621240b2a.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_knf-2-8b/267e641c-7fbd-40d3-a9b7-eb3621240b2a.json deleted file mode 100644 index d8f91ae50a45fefbc6e4418b59263772efd6584e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_knf-2-8b/267e641c-7fbd-40d3-a9b7-eb3621240b2a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_knf-2-8b/1762652580.269415", - "retrieved_timestamp": "1762652580.2694159", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/knf-2-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/knf-2-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42500121898784116 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206718655559387 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4185208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3874667553191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_knfp-2-8b/0bd6a333-afc0-43a4-9d14-fa44c2364184.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_knfp-2-8b/0bd6a333-afc0-43a4-9d14-fa44c2364184.json deleted file mode 100644 index 618cb4a561b7dc1d9b2c7ea9e856767a932790e0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_knfp-2-8b/0bd6a333-afc0-43a4-9d14-fa44c2364184.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_knfp-2-8b/1762652580.2696629", - "retrieved_timestamp": "1762652580.269664", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/knfp-2-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/knfp-2-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5327120928026525 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5304878011708133 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14274924471299094 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4184583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37258976063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_knfp-3-8b/38a5c599-a098-42f4-a7cb-acee487e382a.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_knfp-3-8b/38a5c599-a098-42f4-a7cb-acee487e382a.json deleted file mode 100644 index 2532402609f89e74e077ca1d380b2dc2edd2b078..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_knfp-3-8b/38a5c599-a098-42f4-a7cb-acee487e382a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_knfp-3-8b/1762652580.2700531", - "retrieved_timestamp": "1762652580.2700539", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/knfp-3-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/knfp-3-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49456885508229276 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5199790073136731 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12235649546827794 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41712499999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3881316489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-1-8b/cd7e14cb-b1f1-47d8-81a9-960da8ac4e05.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-1-8b/cd7e14cb-b1f1-47d8-81a9-960da8ac4e05.json deleted file mode 100644 index 96bb72cd2f85c2a9c3917ff42f9ba1d0f16a86fe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-1-8b/cd7e14cb-b1f1-47d8-81a9-960da8ac4e05.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-1-8b/1762652580.2702851", - "retrieved_timestamp": "1762652580.270286", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/kstc-1-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/kstc-1-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4642936297911763 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5209048705325947 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4157916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3892121010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-11-8b/41b46842-dffa-4791-8225-99d676f563c9.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-11-8b/41b46842-dffa-4791-8225-99d676f563c9.json deleted file mode 100644 index 93334c07fbc5f8459a672995f6208f75db13f7ee..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-11-8b/41b46842-dffa-4791-8225-99d676f563c9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-11-8b/1762652580.270522", - "retrieved_timestamp": "1762652580.270522", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/kstc-11-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/kstc-11-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4757343847657549 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5189389675805397 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4117604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3878823138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-4-8b/6b63598f-4891-4b71-99ca-bc56b780d829.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-4-8b/6b63598f-4891-4b71-99ca-bc56b780d829.json deleted file mode 100644 index 58bb003d416d3beb493bbf81e983215ac02a6218..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-4-8b/6b63598f-4891-4b71-99ca-bc56b780d829.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-4-8b/1762652580.270735", - "retrieved_timestamp": "1762652580.270736", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/kstc-4-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/kstc-4-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4769832932175517 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5216059333020012 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4117916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3868849734042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-5-8b/ea79ca75-c55b-457a-b952-528a22567dbb.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-5-8b/ea79ca75-c55b-457a-b952-528a22567dbb.json deleted file mode 100644 index d36f019366b8d344714e938f60f9f3503b4590f6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-5-8b/ea79ca75-c55b-457a-b952-528a22567dbb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-5-8b/1762652580.270952", - "retrieved_timestamp": "1762652580.270953", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/kstc-5-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/kstc-5-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47208739477918593 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5211438914491455 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4223958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3892121010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-6-8b/f7d63a4b-070d-4581-acce-cd356a3dea47.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-6-8b/f7d63a4b-070d-4581-acce-cd356a3dea47.json deleted file mode 100644 index 19241db76f459c7b50e0daa1959900cf0a031b1c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-6-8b/f7d63a4b-070d-4581-acce-cd356a3dea47.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-6-8b/1762652580.2711701", - "retrieved_timestamp": "1762652580.2711701", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/kstc-6-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/kstc-6-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49439376410147295 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5230977287748603 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4104895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3857214095744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-8-8b/85502cb7-db11-43ce-a3cf-f9329ecec2e1.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-8-8b/85502cb7-db11-43ce-a3cf-f9329ecec2e1.json deleted file mode 100644 index 76709f3fccff873a91c47fe091fd369ca9775443..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-8-8b/85502cb7-db11-43ce-a3cf-f9329ecec2e1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-8-8b/1762652580.271383", - "retrieved_timestamp": "1762652580.271384", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/kstc-8-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/kstc-8-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49097173278013445 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5238910223750602 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13066465256797583 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42112499999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3888796542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-9-8b/5f36e182-fa70-41d9-9cc6-12367035fc76.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-9-8b/5f36e182-fa70-41d9-9cc6-12367035fc76.json deleted file mode 100644 index 9e3959758bffb49da2a9f6d10bc3990820648b6d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-9-8b/5f36e182-fa70-41d9-9cc6-12367035fc76.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-9-8b/1762652580.27159", - "retrieved_timestamp": "1762652580.27159", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/kstc-9-8b", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/kstc-9-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4860758343417687 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5238366551736342 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13595166163141995 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4117916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38721742021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-10/79c255e5-8a6b-4afd-a03e-e35cbcbcc712.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-10/79c255e5-8a6b-4afd-a03e-e35cbcbcc712.json deleted file mode 100644 index 4aca0bb19f1d1e4736dfbc6306ed6ad063ca0038..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-10/79c255e5-8a6b-4afd-a03e-e35cbcbcc712.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-10/1762652580.271806", - "retrieved_timestamp": "1762652580.271807", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/slu-10", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-10" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4359920566319587 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5096469529197213 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3920104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3663563829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-11/0091eabc-3888-4e1a-a29d-8c4e98b599f2.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-11/0091eabc-3888-4e1a-a29d-8c4e98b599f2.json deleted file mode 100644 index 683aa6b30b56b47f8d1a18a5f1d986f925168759..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-11/0091eabc-3888-4e1a-a29d-8c4e98b599f2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-11/1762652580.272018", - "retrieved_timestamp": "1762652580.272018", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/slu-11", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-11" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.372519359743259 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4890236865115587 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3919479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33818151595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-13/1a1eaa84-9926-4c4b-b254-96cd667c25ac.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-13/1a1eaa84-9926-4c4b-b254-96cd667c25ac.json deleted file mode 100644 index 895ad086ad49781aa843aec3ddc3b0f73a6de7e6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-13/1a1eaa84-9926-4c4b-b254-96cd667c25ac.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-13/1762652580.272234", - "retrieved_timestamp": "1762652580.272237", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/slu-13", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-13" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4378404854674486 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5097334543819346 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08081570996978851 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38140625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35804521276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-14/59703023-61e1-4df0-8542-703d5a318756.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-14/59703023-61e1-4df0-8542-703d5a318756.json deleted file mode 100644 index 335e9416b6f7a6a3bef8f82f4baddde2f6284719..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-14/59703023-61e1-4df0-8542-703d5a318756.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-14/1762652580.27245", - "retrieved_timestamp": "1762652580.2724512", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/slu-14", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-14" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4106880853912065 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5088505978489455 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3960416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3626994680851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-17/fea528ae-4015-4adf-bce0-f9775554cc5f.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-17/fea528ae-4015-4adf-bce0-f9775554cc5f.json deleted file mode 100644 index f88fc956c0b1e69ec66d32f51c21a0d4364a1ca2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-17/fea528ae-4015-4adf-bce0-f9775554cc5f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-17/1762652580.272654", - "retrieved_timestamp": "1762652580.272655", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/slu-17", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-17" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42167892303532406 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5070562055653275 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08534743202416918 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3761041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3618683510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-2/1950fba0-3a1b-4cbe-8fa5-9947ed8e4bad.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-2/1950fba0-3a1b-4cbe-8fa5-9947ed8e4bad.json deleted file mode 100644 index 6463ec1e0a22ea7fef8cefcf6e979288b2f3b08f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-2/1950fba0-3a1b-4cbe-8fa5-9947ed8e4bad.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-2/1762652580.2728698", - "retrieved_timestamp": "1762652580.272871", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/slu-2", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40159554426698935 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5008068127974601 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3958854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35064827127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-20/1430e550-80ca-4f84-952f-b5b10fbca711.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-20/1430e550-80ca-4f84-952f-b5b10fbca711.json deleted file mode 100644 index 68fd7fdbd9a8f4c9b21d3b46adf1b1980b99155e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-20/1430e550-80ca-4f84-952f-b5b10fbca711.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-20/1762652580.273083", - "retrieved_timestamp": "1762652580.273084", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/slu-20", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-20" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4393143525844759 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5061273966566772 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08685800604229606 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39334375000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36652260638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-22/c0898ca4-21a7-4d83-ad2e-1aa61bd370fa.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-22/c0898ca4-21a7-4d83-ad2e-1aa61bd370fa.json deleted file mode 100644 index d6bd7cf71994b703c7d66534e0179f352771935e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-22/c0898ca4-21a7-4d83-ad2e-1aa61bd370fa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-22/1762652580.2733881", - "retrieved_timestamp": "1762652580.273391", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/slu-22", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-22" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4321201079801593 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5081790871805086 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07930513595166164 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38934375000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3650265957446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-23/f4b76351-e472-47a9-8011-6bf2e7e33a71.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-23/f4b76351-e472-47a9-8011-6bf2e7e33a71.json deleted file mode 100644 index 4b7433c0d7c7b38ccd304e8558fb6e2f9d56d333..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-23/f4b76351-e472-47a9-8011-6bf2e7e33a71.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-23/1762652580.27371", - "retrieved_timestamp": "1762652580.2737112", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/slu-23", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-23" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44780737332499987 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5131603005034272 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09441087613293052 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40924999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3725066489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-25/03c03447-1bf3-4721-8f9e-5ef041ab5d7d.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-25/03c03447-1bf3-4721-8f9e-5ef041ab5d7d.json deleted file mode 100644 index 98d0eb7f2a44d4bc929b3d276ba00088bf0a4779..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-25/03c03447-1bf3-4721-8f9e-5ef041ab5d7d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-25/1762652580.27394", - "retrieved_timestamp": "1762652580.273941", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/slu-25", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-25" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4500303638789523 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5094887898349904 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08383685800604229 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3946145833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3684341755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-29/fe231e36-6cc2-412c-b86e-0ba6ba9cc430.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-29/fe231e36-6cc2-412c-b86e-0ba6ba9cc430.json deleted file mode 100644 index 6f9e95465f433bab672109f8263ce2dc160f4c8c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-29/fe231e36-6cc2-412c-b86e-0ba6ba9cc430.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-29/1762652580.274164", - "retrieved_timestamp": "1762652580.274165", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/slu-29", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-29" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4430610779398662 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5096472519745161 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08685800604229606 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3933125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.366938164893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-32/1095577f-7b50-4854-9c7c-5beb59206e60.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-32/1095577f-7b50-4854-9c7c-5beb59206e60.json deleted file mode 100644 index 553acecb1361517c33de6f600b562f168559f994..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-32/1095577f-7b50-4854-9c7c-5beb59206e60.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-32/1762652580.274382", - "retrieved_timestamp": "1762652580.274383", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/slu-32", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-32" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45155409868039026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5167277162337642 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10725075528700906 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4039166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3765791223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-33/2597a3df-0f30-43d1-b1b3-7a0baac07675.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-33/2597a3df-0f30-43d1-b1b3-7a0baac07675.json deleted file mode 100644 index b6c2d20a91b758fc772c56e295440ae650f670f8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-33/2597a3df-0f30-43d1-b1b3-7a0baac07675.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-33/1762652580.274691", - "retrieved_timestamp": "1762652580.274692", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/slu-33", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-33" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4457339858242796 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5081308429202344 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38667708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3679355053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-34/050afa51-be7c-4cad-ae8b-bd63384df297.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-34/050afa51-be7c-4cad-ae8b-bd63384df297.json deleted file mode 100644 index ae2ba09f1b1d8c8d6d5ec3996c2b6d82f8cc0dbb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-34/050afa51-be7c-4cad-ae8b-bd63384df297.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-34/1762652580.2749598", - "retrieved_timestamp": "1762652580.274961", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/slu-34", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-34" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4350678422142138 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5077400809148992 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3880416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37200797872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-35/0d7698b6-de52-4781-831f-a3ca8b23dd72.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-35/0d7698b6-de52-4781-831f-a3ca8b23dd72.json deleted file mode 100644 index 190ac240d306d6c61aa904b7225be7c142c7681b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-35/0d7698b6-de52-4781-831f-a3ca8b23dd72.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-35/1762652580.275198", - "retrieved_timestamp": "1762652580.2752", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/slu-35", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-35" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42417673993891764 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5103079759559944 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10120845921450151 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39464583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3676030585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-36/cf85253f-0ecd-4943-a508-eab1e562a497.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-36/cf85253f-0ecd-4943-a508-eab1e562a497.json deleted file mode 100644 index 59d4055701d32fb0c192da70918a8faacac95f11..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-36/cf85253f-0ecd-4943-a508-eab1e562a497.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-36/1762652580.275441", - "retrieved_timestamp": "1762652580.275442", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/slu-36", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-36" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4518289250300314 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5087352369131289 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3933125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37109375 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-37/e64e5fe0-c726-4b9d-9d7b-952e7c7508ab.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-37/e64e5fe0-c726-4b9d-9d7b-952e7c7508ab.json deleted file mode 100644 index 32add8ede757cd4fd8997806171c32f731f22792..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-37/e64e5fe0-c726-4b9d-9d7b-952e7c7508ab.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-37/1762652580.2757561", - "retrieved_timestamp": "1762652580.275757", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/slu-37", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-37" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4533526598314694 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5099854293096197 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39464583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3695146276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-6/0e1cd676-f95b-4562-8c5d-e932f148dc23.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-6/0e1cd676-f95b-4562-8c5d-e932f148dc23.json deleted file mode 100644 index 614d3bd2a8471aaa9d89aec2004c6e6704b7c458..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-6/0e1cd676-f95b-4562-8c5d-e932f148dc23.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-6/1762652580.276035", - "retrieved_timestamp": "1762652580.276036", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/slu-6", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-6" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41166216749336204 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5098719666858446 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09441087613293052 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4066458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3611203457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-mix-1/3a8a175f-5173-491b-9acf-87fe781f16df.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-mix-1/3a8a175f-5173-491b-9acf-87fe781f16df.json deleted file mode 100644 index fa1afeab8acb95a8e8b982e0e35390a485b4b3ae..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-mix-1/3a8a175f-5173-491b-9acf-87fe781f16df.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_slu-mix-1/1762652580.276264", - "retrieved_timestamp": "1762652580.276264", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/slu-mix-1", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/slu-mix-1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45689991444921696 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5240269525191525 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42766666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39303523936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_sof-1/b1f4196a-0050-4107-a97b-4e1bd6ece17b.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_sof-1/b1f4196a-0050-4107-a97b-4e1bd6ece17b.json deleted file mode 100644 index c41d8361987e725928d5f10081f6a75b97426372..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_sof-1/b1f4196a-0050-4107-a97b-4e1bd6ece17b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_sof-1/1762652580.276484", - "retrieved_timestamp": "1762652580.2764852", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/sof-1", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/sof-1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4313709845432342 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5009822733212669 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40819791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.367436835106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_sof-10/03761253-711d-428d-a3bd-89974a50b490.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_sof-10/03761253-711d-428d-a3bd-89974a50b490.json deleted file mode 100644 index 177ef6f39a148521bcaaa59ebc3aab37bdbf561c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_sof-10/03761253-711d-428d-a3bd-89974a50b490.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_sof-10/1762652580.276895", - "retrieved_timestamp": "1762652580.276897", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/sof-10", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/sof-10" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46484328249045864 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5197177291754291 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40906250000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38738364361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_sof-3/e5cd6a8b-88ed-4a0d-8584-889a4fde72a7.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_sof-3/e5cd6a8b-88ed-4a0d-8584-889a4fde72a7.json deleted file mode 100644 index 5c417908954e929bd252789cc60a97ed40204a29..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_sof-3/e5cd6a8b-88ed-4a0d-8584-889a4fde72a7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_sof-3/1762652580.277219", - "retrieved_timestamp": "1762652580.27722", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/sof-3", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/sof-3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46369410940748323 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206072122413828 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41312499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3812333776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_sof-6/0755b7f9-bdd7-4e2a-92da-6650934db265.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_sof-6/0755b7f9-bdd7-4e2a-92da-6650934db265.json deleted file mode 100644 index bf6ee89058bd62f0adf13575b0ec3fd4381d7f29..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_sof-6/0755b7f9-bdd7-4e2a-92da-6650934db265.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_sof-6/1762652580.277473", - "retrieved_timestamp": "1762652580.2774742", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/sof-6", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/sof-6" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4353925362482657 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5209098090521417 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41706250000000006 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3843916223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-10/2bcc7f9a-9c36-487e-8522-bfbe1910b857.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-10/2bcc7f9a-9c36-487e-8522-bfbe1910b857.json deleted file mode 100644 index c6b003257d815ce8fc39f7fb797a8e6c2ef7a7e5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-10/2bcc7f9a-9c36-487e-8522-bfbe1910b857.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_test-10/1762652580.2777631", - "retrieved_timestamp": "1762652580.277764", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/test-10", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/test-10" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4578241288669619 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5316217442466934 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42509375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39361702127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-11/98f97092-7c95-46dd-94c7-4030f153d197.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-11/98f97092-7c95-46dd-94c7-4030f153d197.json deleted file mode 100644 index a454b35a67e32669f831c9ba2e4feeedd7895940..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-11/98f97092-7c95-46dd-94c7-4030f153d197.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_test-11/1762652580.2779882", - "retrieved_timestamp": "1762652580.2779891", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/test-11", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/test-11" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45412727119598223 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5350048053167004 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.429 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3939494680851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-12/e49c9cc8-96ff-4a3c-b7b4-ea5562f41449.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-12/e49c9cc8-96ff-4a3c-b7b4-ea5562f41449.json deleted file mode 100644 index 21a126d67ca105845432f1e403e2c210c53c93eb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-12/e49c9cc8-96ff-4a3c-b7b4-ea5562f41449.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_test-12/1762652580.278201", - "retrieved_timestamp": "1762652580.278202", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/test-12", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/test-12" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4368165356808823 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5347063686599355 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42503124999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3935339095744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-13/98772920-a700-4fda-88fd-53c16ac4b1a1.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-13/98772920-a700-4fda-88fd-53c16ac4b1a1.json deleted file mode 100644 index 1ee8baa89fdbb8bded980690e344ef07c185e23e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-13/98772920-a700-4fda-88fd-53c16ac4b1a1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_test-13/1762652580.278408", - "retrieved_timestamp": "1762652580.278409", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/test-13", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/test-13" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45809895521660304 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.531808681066841 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10574018126888217 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4263958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3935339095744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-14/d647b482-3d3b-4ed4-b8b5-d57eedf87db9.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-14/d647b482-3d3b-4ed4-b8b5-d57eedf87db9.json deleted file mode 100644 index 49772b91f6bb685d2a8fb2b12acb1d35722baaac..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-14/d647b482-3d3b-4ed4-b8b5-d57eedf87db9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_test-14/1762652580.2787268", - "retrieved_timestamp": "1762652580.278728", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/test-14", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/test-14" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4443853420036614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5322932549151301 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4316979166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3929521276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-15/f197c7ce-c30a-49ad-bd6c-9571d3b25637.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-15/f197c7ce-c30a-49ad-bd6c-9571d3b25637.json deleted file mode 100644 index fa93524e07aa67d8f0dbde1310ea9c2ddee35a6b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-15/f197c7ce-c30a-49ad-bd6c-9571d3b25637.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_test-15/1762652580.278964", - "retrieved_timestamp": "1762652580.278965", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/test-15", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/test-15" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4364918416468304 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.53278841091336 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4264270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3929521276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-16/80c756a7-9d47-4b49-bf42-bbada0909163.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-16/80c756a7-9d47-4b49-bf42-bbada0909163.json deleted file mode 100644 index 161f9729bc2134aa8a2466c47edd257d01e1fde4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-16/80c756a7-9d47-4b49-bf42-bbada0909163.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_test-16/1762652580.279189", - "retrieved_timestamp": "1762652580.27919", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/test-16", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/test-16" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4599473840520929 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5330160713144172 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1095166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4224583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39303523936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-17/c9933c3d-98ab-4486-bd42-7c90f5ed3bd2.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-17/c9933c3d-98ab-4486-bd42-7c90f5ed3bd2.json deleted file mode 100644 index 31574db5ea40a143251c6b29719b0c4027345c21..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-17/c9933c3d-98ab-4486-bd42-7c90f5ed3bd2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_test-17/1762652580.279401", - "retrieved_timestamp": "1762652580.279402", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/test-17", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/test-17" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42674991245450955 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5329373895863633 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11027190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.429 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39286901595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-18/3f3eeca1-d401-436e-b7e6-5fa82c099270.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-18/3f3eeca1-d401-436e-b7e6-5fa82c099270.json deleted file mode 100644 index fe928b74a790f3e4f6b7fb80e75ab6b6ab1cce0c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-18/3f3eeca1-d401-436e-b7e6-5fa82c099270.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_test-18/1762652580.2796118", - "retrieved_timestamp": "1762652580.279613", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/test-18", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/test-18" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43916474953124374 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5317453097096507 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1148036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42506249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39303523936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-19/ab7e0f6c-bca9-4f83-a4a0-5014c46e0512.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-19/ab7e0f6c-bca9-4f83-a4a0-5014c46e0512.json deleted file mode 100644 index a067dba61526bdc7868550695afd9c941d7ec3d1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-19/ab7e0f6c-bca9-4f83-a4a0-5014c46e0512.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_test-19/1762652580.279826", - "retrieved_timestamp": "1762652580.2798269", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/test-19", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/test-19" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44008896394898867 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5319373895863634 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1095166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4263958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39286901595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-20/6391f921-4de7-4e83-8bb2-8d0ef0b58d8f.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-20/6391f921-4de7-4e83-8bb2-8d0ef0b58d8f.json deleted file mode 100644 index 475ce5b67e3f77abafe568ae5f489e1925cf02fc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_test-20/6391f921-4de7-4e83-8bb2-8d0ef0b58d8f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jaspionjader_test-20/1762652580.2800388", - "retrieved_timestamp": "1762652580.28004", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jaspionjader/test-20", - "developer": "jaspionjader", - "inference_platform": "unknown", - "id": "jaspionjader/test-20" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45292823042859615 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5327388877137041 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42506249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39195478723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jebcarter/jebcarter_psyonic-cetacean-20B/f8461982-37ad-4975-8445-996bdc9e59ce.json b/leaderboard_data/HFOpenLLMv2/jebcarter/jebcarter_psyonic-cetacean-20B/f8461982-37ad-4975-8445-996bdc9e59ce.json deleted file mode 100644 index b37b7a553bb39523eb981d9042d31e17a363d654..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jebcarter/jebcarter_psyonic-cetacean-20B/f8461982-37ad-4975-8445-996bdc9e59ce.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jebcarter_psyonic-cetacean-20B/1762652580.2807941", - "retrieved_timestamp": "1762652580.2807949", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jebcarter/psyonic-cetacean-20B", - "developer": "jebcarter", - "inference_platform": "unknown", - "id": "jebcarter/psyonic-cetacean-20B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25436619281284767 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4907386156835858 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46611458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28856382978723405 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 19.994 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jebish7/jebish7_Llama-3-Nanda-10B-Chat/739c83a9-8ff7-48df-af0c-494891df487b.json b/leaderboard_data/HFOpenLLMv2/jebish7/jebish7_Llama-3-Nanda-10B-Chat/739c83a9-8ff7-48df-af0c-494891df487b.json deleted file mode 100644 index b39af140760443e2726e9c67274201efc5eec98e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jebish7/jebish7_Llama-3-Nanda-10B-Chat/739c83a9-8ff7-48df-af0c-494891df487b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jebish7_Llama-3-Nanda-10B-Chat/1762652580.28106", - "retrieved_timestamp": "1762652580.2810612", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jebish7/Llama-3-Nanda-10B-Chat", - "developer": "jebish7", - "inference_platform": "unknown", - "id": "jebish7/Llama-3-Nanda-10B-Chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2952831819572069 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4958605204321644 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4356041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3156582446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 9.985 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jebish7/jebish7_Llama-3.1-8B-Instruct/cc65b968-d766-4825-85cd-c36872eb1986.json b/leaderboard_data/HFOpenLLMv2/jebish7/jebish7_Llama-3.1-8B-Instruct/cc65b968-d766-4825-85cd-c36872eb1986.json deleted file mode 100644 index b48007a27ca2ef9946a06df05aca4179db461216..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jebish7/jebish7_Llama-3.1-8B-Instruct/cc65b968-d766-4825-85cd-c36872eb1986.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jebish7_Llama-3.1-8B-Instruct/1762652580.281322", - "retrieved_timestamp": "1762652580.281322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jebish7/Llama-3.1-8B-Instruct", - "developer": "jebish7", - "inference_platform": "unknown", - "id": "jebish7/Llama-3.1-8B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5058345190760515 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5088388495224864 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15483383685800603 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3997916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3777426861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jebish7/jebish7_Nemotron-4-Mini-Hindi-4B-Base/70097d1f-8c48-49ab-b285-eebe2c85628e.json b/leaderboard_data/HFOpenLLMv2/jebish7/jebish7_Nemotron-4-Mini-Hindi-4B-Base/70097d1f-8c48-49ab-b285-eebe2c85628e.json deleted file mode 100644 index 7ec5ede74fe59213ff566b20afc3ea798ab93b9d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jebish7/jebish7_Nemotron-4-Mini-Hindi-4B-Base/70097d1f-8c48-49ab-b285-eebe2c85628e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jebish7_Nemotron-4-Mini-Hindi-4B-Base/1762652580.2815292", - "retrieved_timestamp": "1762652580.2815301", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jebish7/Nemotron-4-Mini-Hindi-4B-Base", - "developer": "jebish7", - "inference_platform": "unknown", - "id": "jebish7/Nemotron-4-Mini-Hindi-4B-Base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22848818911599 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3923566745600671 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42490625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25033244680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "NemotronForCausalLM", - "params_billions": 4.191 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jebish7/jebish7_Nemotron-4-Mini-Hindi-4B-Instruct/e108df0b-a1ce-4c07-b683-6d3b33fd3988.json b/leaderboard_data/HFOpenLLMv2/jebish7/jebish7_Nemotron-4-Mini-Hindi-4B-Instruct/e108df0b-a1ce-4c07-b683-6d3b33fd3988.json deleted file mode 100644 index 7ea59c568e08d5bc94a3dcf2a908c683d004d54e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jebish7/jebish7_Nemotron-4-Mini-Hindi-4B-Instruct/e108df0b-a1ce-4c07-b683-6d3b33fd3988.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jebish7_Nemotron-4-Mini-Hindi-4B-Instruct/1762652580.2817988", - "retrieved_timestamp": "1762652580.2818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jebish7/Nemotron-4-Mini-Hindi-4B-Instruct", - "developer": "jebish7", - "inference_platform": "unknown", - "id": "jebish7/Nemotron-4-Mini-Hindi-4B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3345257250761313 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4040596055988545 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41529166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25947473404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "NemotronForCausalLM", - "params_billions": 4.191 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jebish7/jebish7_Nemotron-Mini-4B-Instruct/77bd2442-4004-48cb-ba45-eeb1ffec2a39.json b/leaderboard_data/HFOpenLLMv2/jebish7/jebish7_Nemotron-Mini-4B-Instruct/77bd2442-4004-48cb-ba45-eeb1ffec2a39.json deleted file mode 100644 index 05ef5913a52394c5a393f57b972b98c0cbdfa0f9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jebish7/jebish7_Nemotron-Mini-4B-Instruct/77bd2442-4004-48cb-ba45-eeb1ffec2a39.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jebish7_Nemotron-Mini-4B-Instruct/1762652580.282024", - "retrieved_timestamp": "1762652580.282024", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jebish7/Nemotron-Mini-4B-Instruct", - "developer": "jebish7", - "inference_platform": "unknown", - "id": "jebish7/Nemotron-Mini-4B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37092026932982264 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4244475437312765 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47271875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27825797872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "NemotronForCausalLM", - "params_billions": 4.191 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jebish7/jebish7_aya-expanse-8b/70f2cb5c-feb3-44ac-9346-7ff60137e1c7.json b/leaderboard_data/HFOpenLLMv2/jebish7/jebish7_aya-expanse-8b/70f2cb5c-feb3-44ac-9346-7ff60137e1c7.json deleted file mode 100644 index 9065b9b799cfd5281c084c974e2f7c885f5d8d14..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jebish7/jebish7_aya-expanse-8b/70f2cb5c-feb3-44ac-9346-7ff60137e1c7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jebish7_aya-expanse-8b/1762652580.282242", - "retrieved_timestamp": "1762652580.282243", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jebish7/aya-expanse-8b", - "developer": "jebish7", - "inference_platform": "unknown", - "id": "jebish7/aya-expanse-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37911408396388246 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.496904421264497 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3868958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31025598404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "CohereForCausalLM", - "params_billions": 8.028 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jeonsworld/jeonsworld_CarbonVillain-en-10.7B-v4/bd67084e-d9ca-43c4-ab6e-3fbe8a1fb782.json b/leaderboard_data/HFOpenLLMv2/jeonsworld/jeonsworld_CarbonVillain-en-10.7B-v4/bd67084e-d9ca-43c4-ab6e-3fbe8a1fb782.json deleted file mode 100644 index d8ef44b6467150b9c4e77a75838a6e11f33cf445..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jeonsworld/jeonsworld_CarbonVillain-en-10.7B-v4/bd67084e-d9ca-43c4-ab6e-3fbe8a1fb782.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jeonsworld_CarbonVillain-en-10.7B-v4/1762652580.2876348", - "retrieved_timestamp": "1762652580.287636", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jeonsworld/CarbonVillain-en-10.7B-v4", - "developer": "jeonsworld", - "inference_platform": "unknown", - "id": "jeonsworld/CarbonVillain-en-10.7B-v4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45792386423578324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.516795955873779 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3965416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31416223404255317 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jieliu/jieliu_Storm-7B/f521cb33-487e-4636-9039-fe1af3e090f2.json b/leaderboard_data/HFOpenLLMv2/jieliu/jieliu_Storm-7B/f521cb33-487e-4636-9039-fe1af3e090f2.json deleted file mode 100644 index fe1551da4184f32e8bd1bd9bd9844394a074d24f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jieliu/jieliu_Storm-7B/f521cb33-487e-4636-9039-fe1af3e090f2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jieliu_Storm-7B/1762652580.288308", - "retrieved_timestamp": "1762652580.288309", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jieliu/Storm-7B", - "developer": "jieliu", - "inference_platform": "unknown", - "id": "jieliu/Storm-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3424192254329623 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5187285371254579 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4428958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3119182180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jiviai/jiviai_medX_v2/386bc585-73ed-443e-b8ce-8723c533e41b.json b/leaderboard_data/HFOpenLLMv2/jiviai/jiviai_medX_v2/386bc585-73ed-443e-b8ce-8723c533e41b.json deleted file mode 100644 index f8565daa0f1777968bf6ec8ccfc5d82e8e6cd1d3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jiviai/jiviai_medX_v2/386bc585-73ed-443e-b8ce-8723c533e41b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jiviai_medX_v2/1762652580.288615", - "retrieved_timestamp": "1762652580.288616", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jiviai/medX_v2", - "developer": "jiviai", - "inference_platform": "unknown", - "id": "jiviai/medX_v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37431792089433813 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4508721125093523 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34984375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34283577127659576 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jlzhou/jlzhou_Qwen2.5-3B-Infinity-Instruct-0625/09585af5-dd80-4418-8f58-c6ae718a1eee.json b/leaderboard_data/HFOpenLLMv2/jlzhou/jlzhou_Qwen2.5-3B-Infinity-Instruct-0625/09585af5-dd80-4418-8f58-c6ae718a1eee.json deleted file mode 100644 index 32796fcf004dd1cc6bd16b953a6542af8bfdef5c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jlzhou/jlzhou_Qwen2.5-3B-Infinity-Instruct-0625/09585af5-dd80-4418-8f58-c6ae718a1eee.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jlzhou_Qwen2.5-3B-Infinity-Instruct-0625/1762652580.288917", - "retrieved_timestamp": "1762652580.288918", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jlzhou/Qwen2.5-3B-Infinity-Instruct-0625", - "developer": "jlzhou", - "inference_platform": "unknown", - "id": "jlzhou/Qwen2.5-3B-Infinity-Instruct-0625" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35575827692744144 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4773774601029352 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13670694864048338 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39809374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3198969414893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01/f7207c82-5fc7-447a-b532-42bdb77ecfb4.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01/f7207c82-5fc7-447a-b532-42bdb77ecfb4.json deleted file mode 100644 index f563fa644d6d008aa5bbf24def59306ff2d746d6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01/f7207c82-5fc7-447a-b532-42bdb77ecfb4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01/1762652580.289233", - "retrieved_timestamp": "1762652580.289234", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42712447417297217 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5035519809362171 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4637604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37391954787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1/592dcd83-1adb-4193-add2-fb0ae66ea7ee.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1/592dcd83-1adb-4193-add2-fb0ae66ea7ee.json deleted file mode 100644 index a0ff054ef7d83dd32e1732ddb83cbbf40364d3ea..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1/592dcd83-1adb-4193-add2-fb0ae66ea7ee.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1/1762652580.289527", - "retrieved_timestamp": "1762652580.2895281", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42532591302189304 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5018845446835877 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09667673716012085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41502083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37242353723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01/2c82f973-c6cb-4aa2-9121-51bb0343aae4.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01/2c82f973-c6cb-4aa2-9121-51bb0343aae4.json deleted file mode 100644 index 25216d2afde0f004689b2950b462ee659fa9faad..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01/2c82f973-c6cb-4aa2-9121-51bb0343aae4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01/1762652580.2897432", - "retrieved_timestamp": "1762652580.289744", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33774828565982706 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4917135045463188 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5017708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3533078457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1/42b63cfd-3b06-4363-bf78-40c40da10299.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1/42b63cfd-3b06-4363-bf78-40c40da10299.json deleted file mode 100644 index fab251ce5d681c6c90183163f9c296ec84fbfe2e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1/42b63cfd-3b06-4363-bf78-40c40da10299.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1/1762652580.289967", - "retrieved_timestamp": "1762652580.289967", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4273993005226133 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5125777877188348 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08081570996978851 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42264583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37391954787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01/cd4acb74-9433-435c-b0e9-9750fa52e3c0.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01/cd4acb74-9433-435c-b0e9-9750fa52e3c0.json deleted file mode 100644 index 5e2032caa0625f260d1708495fd76efc574c8fc5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01/cd4acb74-9433-435c-b0e9-9750fa52e3c0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01/1762652580.2902021", - "retrieved_timestamp": "1762652580.2902029", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32036219453272874 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48835763921755193 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5097708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33444148936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1/e9a9ec78-4ada-4ce4-ad92-c27332279f84.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1/e9a9ec78-4ada-4ce4-ad92-c27332279f84.json deleted file mode 100644 index f38eb28a71a9c051e10aacb4f77d040947eac6c2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1/e9a9ec78-4ada-4ce4-ad92-c27332279f84.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1/1762652580.290431", - "retrieved_timestamp": "1762652580.290432", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43963904661852776 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5140041302485145 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08006042296072508 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43979166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36959773936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01/b4e42076-bbff-4179-897d-b45a0e959020.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01/b4e42076-bbff-4179-897d-b45a0e959020.json deleted file mode 100644 index 778b6ad2a75c1b45ac458fc83a9fcc20e770f9d4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01/b4e42076-bbff-4179-897d-b45a0e959020.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01/1762652580.290661", - "retrieved_timestamp": "1762652580.2906618", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2814443454478561 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4854325756272537 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5163125000000001 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3295378989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1/4017ff46-f389-4024-be9c-4360b0b6e64c.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1/4017ff46-f389-4024-be9c-4360b0b6e64c.json deleted file mode 100644 index a1941a056b333efa5f9878082e0e19759b0eda9f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1/4017ff46-f389-4024-be9c-4360b0b6e64c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1/1762652580.2908769", - "retrieved_timestamp": "1762652580.290878", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4302218114602588 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5157097379648965 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43315624999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36627327127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01/6bef1092-ece2-4aeb-8dbe-0e1a02c95f2f.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01/6bef1092-ece2-4aeb-8dbe-0e1a02c95f2f.json deleted file mode 100644 index 0f91aeaf19424128943f546efc77fe3f9862d539..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01/6bef1092-ece2-4aeb-8dbe-0e1a02c95f2f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01/1762652580.2910998", - "retrieved_timestamp": "1762652580.291101", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2789963962286732 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48611535229340735 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0015105740181268882 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5150104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3304521276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1/872cddea-7a06-4b80-9243-423bf49c222c.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1/872cddea-7a06-4b80-9243-423bf49c222c.json deleted file mode 100644 index c700c8a9de25a9d71edd8fda3f6d84a8ab0aa032..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1/872cddea-7a06-4b80-9243-423bf49c222c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1/1762652580.291321", - "retrieved_timestamp": "1762652580.291322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4222784434190171 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5153764046315631 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4384270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3650265957446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01/9dfd4a1b-fa18-4d54-a7bd-a519f87b532b.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01/9dfd4a1b-fa18-4d54-a7bd-a519f87b532b.json deleted file mode 100644 index a14a1fe4a7e5c3948d552c4717c5ffa5279e8c04..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01/9dfd4a1b-fa18-4d54-a7bd-a519f87b532b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01/1762652580.291548", - "retrieved_timestamp": "1762652580.291548", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4358923212631374 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5040935986635269 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45315625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3762466755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1/184a8906-d998-4e03-bf6f-f66ca904a7b7.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1/184a8906-d998-4e03-bf6f-f66ca904a7b7.json deleted file mode 100644 index 2f62582bf6fe4cd5f6f50362a8eb4b4e1a054b9c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1/184a8906-d998-4e03-bf6f-f66ca904a7b7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1/1762652580.291779", - "retrieved_timestamp": "1762652580.29178", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4201551882338861 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.501124270710985 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09818731117824774 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41502083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699301861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01/11f14586-5f0c-4e0b-b41e-f3e0f298b781.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01/11f14586-5f0c-4e0b-b41e-f3e0f298b781.json deleted file mode 100644 index 88d236121a4f44f5539f808f8013e1aa223a792b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01/11f14586-5f0c-4e0b-b41e-f3e0f298b781.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01/1762652580.292005", - "retrieved_timestamp": "1762652580.2920058", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35178659290682057 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49985217584312186 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48710416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3611203457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1/3b9966ca-8157-4f32-b276-9d36dd1045e1.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1/3b9966ca-8157-4f32-b276-9d36dd1045e1.json deleted file mode 100644 index 1e2fa1dedf61073d38f1507b97d46629001c5960..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1/3b9966ca-8157-4f32-b276-9d36dd1045e1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1/1762652580.2922251", - "retrieved_timestamp": "1762652580.292226", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42038014689911657 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5107301269172088 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08761329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42785416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37101063829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01/414c1eec-86bc-4d86-a014-2ea586eebfb1.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01/414c1eec-86bc-4d86-a014-2ea586eebfb1.json deleted file mode 100644 index 154bdbf07467a571a0b201da1bee225bd7b98f93..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01/414c1eec-86bc-4d86-a014-2ea586eebfb1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01/1762652580.292447", - "retrieved_timestamp": "1762652580.292447", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34541682735142754 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4983827321097329 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49113541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3531416223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1/c9e8c1d4-c031-4f90-a14b-30633e75f2c3.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1/c9e8c1d4-c031-4f90-a14b-30633e75f2c3.json deleted file mode 100644 index 6cfb4e72522bb0959f0f4985a65f90a5e6f5973c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1/c9e8c1d4-c031-4f90-a14b-30633e75f2c3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1/1762652580.292675", - "retrieved_timestamp": "1762652580.2926762", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40916435058976847 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.513665952913411 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08081570996978851 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43569791666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.366938164893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01/4532b233-abbc-4fbd-ba77-801eb1398361.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01/4532b233-abbc-4fbd-ba77-801eb1398361.json deleted file mode 100644 index 420903c9b3e80938cd22c20f3105e9d91037ea83..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01/4532b233-abbc-4fbd-ba77-801eb1398361.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01/1762652580.292904", - "retrieved_timestamp": "1762652580.2929049", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29038728351884113 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4967337534367295 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4990729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34898603723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1/d5916658-91c3-418f-9cd6-c49dcc8927a3.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1/d5916658-91c3-418f-9cd6-c49dcc8927a3.json deleted file mode 100644 index 2a22ca815d412f17633be415a69459501f856d90..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1/d5916658-91c3-418f-9cd6-c49dcc8927a3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1/1762652580.2931998", - "retrieved_timestamp": "1762652580.293205", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41988036188424493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5146905664948336 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08081570996978851 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43576041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615359042553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01/a29cab83-e937-4a2a-a9fd-986fd1c67e03.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01/a29cab83-e937-4a2a-a9fd-986fd1c67e03.json deleted file mode 100644 index 653884b6543c8c1ddba7cbb74029d5a2512048bd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01/a29cab83-e937-4a2a-a9fd-986fd1c67e03.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01/1762652580.293625", - "retrieved_timestamp": "1762652580.293626", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29131149793658606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49182964384768835 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4976770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34541223404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1/2aae97a9-6d0a-438d-9f74-e7a30e85face.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1/2aae97a9-6d0a-438d-9f74-e7a30e85face.json deleted file mode 100644 index 48b0a64babaa33022bf7bd3a8b2ee27642b62d8f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1/2aae97a9-6d0a-438d-9f74-e7a30e85face.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1/1762652580.293948", - "retrieved_timestamp": "1762652580.293949", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41623337189767595 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5138610942606995 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43172916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624501329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_dare_linear/060fe548-f690-4492-9c0f-ada0210b0386.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_dare_linear/060fe548-f690-4492-9c0f-ada0210b0386.json deleted file mode 100644 index 6b0fa74ce3a76b0cd0bd220aa289e565bc57a036..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_dare_linear/060fe548-f690-4492-9c0f-ada0210b0386.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_dare_linear/1762652580.294196", - "retrieved_timestamp": "1762652580.294197", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_dare_linear", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_dare_linear" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21454961723781787 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4282807940700452 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49792708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24143949468085107 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.1/7d709f22-c4e8-4903-b924-a86728dcf26b.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.1/7d709f22-c4e8-4903-b924-a86728dcf26b.json deleted file mode 100644 index 4f699a3aa88a90fe3fc93765e16dd51bd0ad285e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.1/7d709f22-c4e8-4903-b924-a86728dcf26b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.1/1762652580.2944481", - "retrieved_timestamp": "1762652580.2944489", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.1", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18907055501624578 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41187360174735804 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46580208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22647938829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.3/c45c03dd-efbe-4c86-a07d-e7831210e017.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.3/c45c03dd-efbe-4c86-a07d-e7831210e017.json deleted file mode 100644 index fa5bbb293abd49d4955901a407cc964eb4c6cb1d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.3/c45c03dd-efbe-4c86-a07d-e7831210e017.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.3/1762652580.294691", - "retrieved_timestamp": "1762652580.294692", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.3", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21132705665412216 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4558569854124363 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0015105740181268882 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5069479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30402260638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.7/3b51b346-a23c-4add-9623-86c9591eddd0.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.7/3b51b346-a23c-4add-9623-86c9591eddd0.json deleted file mode 100644 index ca2a9ca868b955bd701d248354aa41dc4526dc65..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.7/3b51b346-a23c-4add-9623-86c9591eddd0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.7/1762652580.2949278", - "retrieved_timestamp": "1762652580.2949288", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.7", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20338368861288048 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4722858888388635 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0030211480362537764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5110104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3148271276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.9/35557106-88b1-4f6a-bf33-17ea6744f208.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.9/35557106-88b1-4f6a-bf33-17ea6744f208.json deleted file mode 100644 index dd1b2c701045a8f65d2d5f89775efe89a6a2f099..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.9/35557106-88b1-4f6a-bf33-17ea6744f208.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_dare_ties-density-0.9/1762652580.29516", - "retrieved_timestamp": "1762652580.295161", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.9", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.9" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21607335203925582 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46639610671811504 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0015105740181268882 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5230416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3143284574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_linear/89b55a5a-8f83-4a87-906a-32c1e84b8220.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_linear/89b55a5a-8f83-4a87-906a-32c1e84b8220.json deleted file mode 100644 index 52c970e910b6145cb403cc6479f8a363c077518c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_linear/89b55a5a-8f83-4a87-906a-32c1e84b8220.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_linear/1762652580.295396", - "retrieved_timestamp": "1762652580.295396", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_linear", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_linear" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4308213318439518 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5031496839210309 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10045317220543806 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40971874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37117686170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_ties-density-0.1/ec8e412e-96e8-43ae-98e1-f605228f3f6d.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_ties-density-0.1/ec8e412e-96e8-43ae-98e1-f605228f3f6d.json deleted file mode 100644 index 93af6526f0d232ab49e0f507f217e6ad3cd02209..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_ties-density-0.1/ec8e412e-96e8-43ae-98e1-f605228f3f6d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_ties-density-0.1/1762652580.295634", - "retrieved_timestamp": "1762652580.295635", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_ties-density-0.1", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41161229980895137 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5021445196013956 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07930513595166164 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.417375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36003989361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_ties-density-0.3/29b19ca6-ec5f-4ef1-9721-cb2199661873.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_ties-density-0.3/29b19ca6-ec5f-4ef1-9721-cb2199661873.json deleted file mode 100644 index 9c2ab58568ac8a9a3eb68bc79d94472baca3c900..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_ties-density-0.3/29b19ca6-ec5f-4ef1-9721-cb2199661873.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_ties-density-0.3/1762652580.29586", - "retrieved_timestamp": "1762652580.295861", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_ties-density-0.3", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3626278274977061 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49061122520005807 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40248958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33211436170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_ties-density-0.5/12f38eb7-57be-45c6-a53a-9d4859413e94.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_ties-density-0.5/12f38eb7-57be-45c6-a53a-9d4859413e94.json deleted file mode 100644 index cb1cde62a38dd8b58124a576577ab6dd05fbf6b6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_ties-density-0.5/12f38eb7-57be-45c6-a53a-9d4859413e94.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_ties-density-0.5/1762652580.2960892", - "retrieved_timestamp": "1762652580.2960901", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_ties-density-0.5", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37966373666316483 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47931248948849836 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3879791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31748670212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_ties-density-0.7/22ae576f-6bec-450f-812f-4315779be0a1.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_ties-density-0.7/22ae576f-6bec-450f-812f-4315779be0a1.json deleted file mode 100644 index b8f36b265f33d78d911d2400b32698e405e8cd81..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_ties-density-0.7/22ae576f-6bec-450f-812f-4315779be0a1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_ties-density-0.7/1762652580.296313", - "retrieved_timestamp": "1762652580.296314", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_ties-density-0.7", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3681232463197649 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4738186124296502 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3880729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3152426861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_ties-density-0.9/76c364c1-1e67-4536-8f23-85f84f0cd554.json b/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_ties-density-0.9/76c364c1-1e67-4536-8f23-85f84f0cd554.json deleted file mode 100644 index f43ca3ab29bbd49ac328214d2e955a93a8c4e808..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/johnsutor/johnsutor_Llama-3-8B-Instruct_ties-density-0.9/76c364c1-1e67-4536-8f23-85f84f0cd554.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/johnsutor_Llama-3-8B-Instruct_ties-density-0.9/1762652580.296535", - "retrieved_timestamp": "1762652580.296536", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "johnsutor/Llama-3-8B-Instruct_ties-density-0.9", - "developer": "johnsutor", - "inference_platform": "unknown", - "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.9" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3858085435533274 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47354321136013144 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3880416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3181515957446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-14B-Instruct-4k-DPO/fe0cfe19-b019-459e-a71d-46d55612a95e.json b/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-14B-Instruct-4k-DPO/fe0cfe19-b019-459e-a71d-46d55612a95e.json deleted file mode 100644 index c4f4403eacad1553ac556ba336b6d5faaaf89a0e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-14B-Instruct-4k-DPO/fe0cfe19-b019-459e-a71d-46d55612a95e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-14B-Instruct-4k-DPO/1762652580.296761", - "retrieved_timestamp": "1762652580.2967622", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jpacifico/Chocolatine-14B-Instruct-4k-DPO", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Chocolatine-14B-Instruct-4k-DPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4688648341954902 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6299582409761587 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1782477341389728 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44388541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4763962765957447 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.96 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-14B-Instruct-DPO-v1.2/aae9e150-7992-4241-91af-0c55d03d709f.json b/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-14B-Instruct-DPO-v1.2/aae9e150-7992-4241-91af-0c55d03d709f.json deleted file mode 100644 index 7684266adbea697048d8d28efe90a7b1f7baaf53..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-14B-Instruct-DPO-v1.2/aae9e150-7992-4241-91af-0c55d03d709f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-14B-Instruct-DPO-v1.2/1762652580.297051", - "retrieved_timestamp": "1762652580.297052", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jpacifico/Chocolatine-14B-Instruct-DPO-v1.2", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Chocolatine-14B-Instruct-DPO-v1.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6852107962428579 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6438408959901142 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20921450151057402 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4267708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46966422872340424 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.96 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-14B-Instruct-DPO-v1.3/b56c681a-592f-491a-aa0a-030848356563.json b/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-14B-Instruct-DPO-v1.3/b56c681a-592f-491a-aa0a-030848356563.json deleted file mode 100644 index b38e20ba37a6c2227c15f37bcebe680594898e6d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-14B-Instruct-DPO-v1.3/b56c681a-592f-491a-aa0a-030848356563.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-14B-Instruct-DPO-v1.3/1762652580.2973812", - "retrieved_timestamp": "1762652580.297384", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jpacifico/Chocolatine-14B-Instruct-DPO-v1.3", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Chocolatine-14B-Instruct-DPO-v1.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.703995398874985 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6846125547592651 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5619335347432024 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42339583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5374002659574468 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-2-14B-Instruct-DPO-v2.0b1/9ae740a8-6d7c-438c-942f-11ac0f6cbe79.json b/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-2-14B-Instruct-DPO-v2.0b1/9ae740a8-6d7c-438c-942f-11ac0f6cbe79.json deleted file mode 100644 index 7d0823b7a4d03dc0092bf1af4866220dc1c1a1d6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-2-14B-Instruct-DPO-v2.0b1/9ae740a8-6d7c-438c-942f-11ac0f6cbe79.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-2-14B-Instruct-DPO-v2.0b1/1762652580.2977622", - "retrieved_timestamp": "1762652580.297763", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jpacifico/Chocolatine-2-14B-Instruct-DPO-v2.0b1", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Chocolatine-2-14B-Instruct-DPO-v2.0b1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10334024831890495 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.669567432054888 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2756797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37583892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44673958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5123836436170213 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-2-14B-Instruct-v2.0.1/c68ca8a7-07d8-4295-a535-a573fc3893b7.json b/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-2-14B-Instruct-v2.0.1/c68ca8a7-07d8-4295-a535-a573fc3893b7.json deleted file mode 100644 index 64d3eafb0eff24bc5016cf442ec4747c53a19e5e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-2-14B-Instruct-v2.0.1/c68ca8a7-07d8-4295-a535-a573fc3893b7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-2-14B-Instruct-v2.0.1/1762652580.298285", - "retrieved_timestamp": "1762652580.2982872", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jpacifico/Chocolatine-2-14B-Instruct-v2.0.1", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07421419611076388 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6736278064166185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.479607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39177852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.50075 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5299202127659575 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-2-14B-Instruct-v2.0.3/ccf2d437-d3e3-4a53-9249-e6df2fd04f49.json b/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-2-14B-Instruct-v2.0.3/ccf2d437-d3e3-4a53-9249-e6df2fd04f49.json deleted file mode 100644 index 3795ac4b346a640a2cfc914777ab66cf7f36a604..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-2-14B-Instruct-v2.0.3/ccf2d437-d3e3-4a53-9249-e6df2fd04f49.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-2-14B-Instruct-v2.0.3/1762652580.298579", - "retrieved_timestamp": "1762652580.29858", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jpacifico/Chocolatine-2-14B-Instruct-v2.0.3", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7037205725253439 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6548026688308357 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4206948640483384 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47681250000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5374002659574468 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-2-14B-Instruct-v2.0/85b8aede-7eb3-4997-9529-2f7d4603fb9e.json b/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-2-14B-Instruct-v2.0/85b8aede-7eb3-4997-9529-2f7d4603fb9e.json deleted file mode 100644 index 6982b36b535dc2e3c9db6bfca59bf94621507c8e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-2-14B-Instruct-v2.0/85b8aede-7eb3-4997-9529-2f7d4603fb9e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-2-14B-Instruct-v2.0/1762652580.2980192", - "retrieved_timestamp": "1762652580.2980192", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jpacifico/Chocolatine-2-14B-Instruct-v2.0", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0885273297073986 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6769929749559443 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48036253776435045 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5021145833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5301695478723404 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-2-14B-Instruct-v2.0b2/6837502d-0f08-48d8-b85e-70f3e07a2531.json b/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-2-14B-Instruct-v2.0b2/6837502d-0f08-48d8-b85e-70f3e07a2531.json deleted file mode 100644 index 0cc5f3cf493e5a59e3ea16a200a7ee9b9003bc42..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-2-14B-Instruct-v2.0b2/6837502d-0f08-48d8-b85e-70f3e07a2531.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-2-14B-Instruct-v2.0b2/1762652580.298837", - "retrieved_timestamp": "1762652580.298838", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jpacifico/Chocolatine-2-14B-Instruct-v2.0b2", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0b2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7240787776433197 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6475822300543483 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3950151057401813 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38338926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48075 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5369015957446809 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-2-14B-Instruct-v2.0b3/f345f9cb-7233-4f4e-8e8b-a0b607502d1d.json b/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-2-14B-Instruct-v2.0b3/f345f9cb-7233-4f4e-8e8b-a0b607502d1d.json deleted file mode 100644 index c57b88ce391c57c0658684bf5fa5499f4b72285c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-2-14B-Instruct-v2.0b3/f345f9cb-7233-4f4e-8e8b-a0b607502d1d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-2-14B-Instruct-v2.0b3/1762652580.2990808", - "retrieved_timestamp": "1762652580.299082", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jpacifico/Chocolatine-2-14B-Instruct-v2.0b3", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0b3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7322969720342026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.646878884179919 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4108761329305136 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47811458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5337433510638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-3B-Instruct-DPO-Revised/08a646ba-9b4a-483e-8adf-f4e203a9be5d.json b/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-3B-Instruct-DPO-Revised/08a646ba-9b4a-483e-8adf-f4e203a9be5d.json deleted file mode 100644 index 6245567dd90ce4f65183e3241527cd4824a78cab..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-3B-Instruct-DPO-Revised/08a646ba-9b4a-483e-8adf-f4e203a9be5d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-3B-Instruct-DPO-Revised/1762652580.299312", - "retrieved_timestamp": "1762652580.299314", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jpacifico/Chocolatine-3B-Instruct-DPO-Revised", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Chocolatine-3B-Instruct-DPO-Revised" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5622625744136669 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5539982344792619 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18051359516616314 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44534375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3988530585106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-3B-Instruct-DPO-v1.0/7f969b69-cb14-4291-a15f-60f2b56e23ad.json b/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-3B-Instruct-DPO-v1.0/7f969b69-cb14-4291-a15f-60f2b56e23ad.json deleted file mode 100644 index ab93a7a1be5b5dcd2495dc7558b4173f827eba7f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-3B-Instruct-DPO-v1.0/7f969b69-cb14-4291-a15f-60f2b56e23ad.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-3B-Instruct-DPO-v1.0/1762652580.29967", - "retrieved_timestamp": "1762652580.299671", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jpacifico/Chocolatine-3B-Instruct-DPO-v1.0", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Chocolatine-3B-Instruct-DPO-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3737184005106451 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5471398082537478 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1782477341389728 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4754791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3937001329787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-3B-Instruct-DPO-v1.2/f34988e6-20f5-4d77-9233-70d5bc6193fb.json b/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-3B-Instruct-DPO-v1.2/f34988e6-20f5-4d77-9233-70d5bc6193fb.json deleted file mode 100644 index 89805e5f3e5a5aeaea8a87ff4ca461ca303b3a83..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Chocolatine-3B-Instruct-DPO-v1.2/f34988e6-20f5-4d77-9233-70d5bc6193fb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jpacifico_Chocolatine-3B-Instruct-DPO-v1.2/1762652580.300061", - "retrieved_timestamp": "1762652580.300063", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jpacifico/Chocolatine-3B-Instruct-DPO-v1.2", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Chocolatine-3B-Instruct-DPO-v1.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5455014915978493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5487182027245813 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20468277945619334 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3389261744966443 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41542708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3877160904255319 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Distilucie-7B-Math-Instruct-DPO-v0.1/8ea866ce-c4a8-4981-b221-ee7b2dc898cd.json b/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Distilucie-7B-Math-Instruct-DPO-v0.1/8ea866ce-c4a8-4981-b221-ee7b2dc898cd.json deleted file mode 100644 index b3b67bd77c121d772e147028216a908803a74450..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Distilucie-7B-Math-Instruct-DPO-v0.1/8ea866ce-c4a8-4981-b221-ee7b2dc898cd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jpacifico_Distilucie-7B-Math-Instruct-DPO-v0.1/1762652580.300392", - "retrieved_timestamp": "1762652580.3003929", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jpacifico/Distilucie-7B-Math-Instruct-DPO-v0.1", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Distilucie-7B-Math-Instruct-DPO-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30475028479988653 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38346961466103785 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0256797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3644479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1809341755319149 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.707 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Lucie-7B-Instruct-DPO-v1.1.3/643a510c-b9f4-4222-a1b0-09d7d5434de8.json b/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Lucie-7B-Instruct-DPO-v1.1.3/643a510c-b9f4-4222-a1b0-09d7d5434de8.json deleted file mode 100644 index de205c889c1f9d3043f1ce8f56a66c637b0f829c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Lucie-7B-Instruct-DPO-v1.1.3/643a510c-b9f4-4222-a1b0-09d7d5434de8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jpacifico_Lucie-7B-Instruct-DPO-v1.1.3/1762652580.3010209", - "retrieved_timestamp": "1762652580.301022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jpacifico/Lucie-7B-Instruct-DPO-v1.1.3", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Lucie-7B-Instruct-DPO-v1.1.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3044754584502453 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.381900181819828 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38178124999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1763630319148936 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.707 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Lucie-7B-Instruct-DPO-v1.1/ad0aa0da-dac4-42a9-ae62-ebe03aa40643.json b/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Lucie-7B-Instruct-DPO-v1.1/ad0aa0da-dac4-42a9-ae62-ebe03aa40643.json deleted file mode 100644 index a458218197984529afa5b36be59da691bbee901d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Lucie-7B-Instruct-DPO-v1.1/ad0aa0da-dac4-42a9-ae62-ebe03aa40643.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jpacifico_Lucie-7B-Instruct-DPO-v1.1/1762652580.300676", - "retrieved_timestamp": "1762652580.300677", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jpacifico/Lucie-7B-Instruct-DPO-v1.1", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Lucie-7B-Instruct-DPO-v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31209413245743517 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37810118011411814 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40159374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18375997340425532 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.707 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Lucie-7B-Instruct-Merged-Model_Stock-v1.0/f28fc4d7-d3eb-4915-967a-db97667e85bc.json b/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Lucie-7B-Instruct-Merged-Model_Stock-v1.0/f28fc4d7-d3eb-4915-967a-db97667e85bc.json deleted file mode 100644 index af6df202e146d2d042eab3b631bbb6c1c5a7dab2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Lucie-7B-Instruct-Merged-Model_Stock-v1.0/f28fc4d7-d3eb-4915-967a-db97667e85bc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jpacifico_Lucie-7B-Instruct-Merged-Model_Stock-v1.0/1762652580.3014882", - "retrieved_timestamp": "1762652580.3014889", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.0", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32335979645119395 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3802022135816421 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38438541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1870844414893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.707 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Lucie-7B-Instruct-Merged-Model_Stock-v1.1/03e7b19a-c31a-4bd4-8560-3b8ac4c7c80c.json b/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Lucie-7B-Instruct-Merged-Model_Stock-v1.1/03e7b19a-c31a-4bd4-8560-3b8ac4c7c80c.json deleted file mode 100644 index 8e1368179e7e4ebe2cdba5b7b2a25bd55f806a52..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Lucie-7B-Instruct-Merged-Model_Stock-v1.1/03e7b19a-c31a-4bd4-8560-3b8ac4c7c80c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jpacifico_Lucie-7B-Instruct-Merged-Model_Stock-v1.1/1762652580.301858", - "retrieved_timestamp": "1762652580.3018591", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.1", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30142798884736943 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38078615414710804 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37502083333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18617021276595744 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.707 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Lucie-Boosted-7B-Instruct/4c7575d2-d538-4767-8d7e-d905b11f84f9.json b/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Lucie-Boosted-7B-Instruct/4c7575d2-d538-4767-8d7e-d905b11f84f9.json deleted file mode 100644 index 014694a68b9db2eaedc832a19e9a0c90540722cd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jpacifico/jpacifico_Lucie-Boosted-7B-Instruct/4c7575d2-d538-4767-8d7e-d905b11f84f9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jpacifico_Lucie-Boosted-7B-Instruct/1762652580.302166", - "retrieved_timestamp": "1762652580.3021681", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jpacifico/Lucie-Boosted-7B-Instruct", - "developer": "jpacifico", - "inference_platform": "unknown", - "id": "jpacifico/Lucie-Boosted-7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25661467129438775 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34654827210803724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.369875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1629820478723404 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.707 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jsfs11/jsfs11_L3-8B-Stheno-slerp/4148a653-5fda-41c2-bf7e-1c03d385b7a1.json b/leaderboard_data/HFOpenLLMv2/jsfs11/jsfs11_L3-8B-Stheno-slerp/4148a653-5fda-41c2-bf7e-1c03d385b7a1.json deleted file mode 100644 index 3542b6181f2457ca2bf4295f22ae1dcbb6f64af5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jsfs11/jsfs11_L3-8B-Stheno-slerp/4148a653-5fda-41c2-bf7e-1c03d385b7a1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jsfs11_L3-8B-Stheno-slerp/1762652580.302513", - "retrieved_timestamp": "1762652580.302515", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jsfs11/L3-8B-Stheno-slerp", - "developer": "jsfs11", - "inference_platform": "unknown", - "id": "jsfs11/L3-8B-Stheno-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6751940407008958 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5325675903618755 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09894259818731117 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3725416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36494348404255317 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jsfs11/jsfs11_MixtureofMerges-MoE-4x7b-v4/8143abf5-bd1d-4cdd-b555-5135f04945c3.json b/leaderboard_data/HFOpenLLMv2/jsfs11/jsfs11_MixtureofMerges-MoE-4x7b-v4/8143abf5-bd1d-4cdd-b555-5135f04945c3.json deleted file mode 100644 index 112674ce24b933dd95dc88e340bb3e6398d14d19..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jsfs11/jsfs11_MixtureofMerges-MoE-4x7b-v4/8143abf5-bd1d-4cdd-b555-5135f04945c3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jsfs11_MixtureofMerges-MoE-4x7b-v4/1762652580.302909", - "retrieved_timestamp": "1762652580.3029099", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jsfs11/MixtureofMerges-MoE-4x7b-v4", - "developer": "jsfs11", - "inference_platform": "unknown", - "id": "jsfs11/MixtureofMerges-MoE-4x7b-v4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40299405577201824 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5169007103786006 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43855208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30319148936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.154 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/jsfs11/jsfs11_MixtureofMerges-MoE-4x7b-v5/a452af19-e167-45ca-99d2-5def2e4ad774.json b/leaderboard_data/HFOpenLLMv2/jsfs11/jsfs11_MixtureofMerges-MoE-4x7b-v5/a452af19-e167-45ca-99d2-5def2e4ad774.json deleted file mode 100644 index fcbd3be3cf02c1c897b5d2c90409cfc00a9598df..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/jsfs11/jsfs11_MixtureofMerges-MoE-4x7b-v5/a452af19-e167-45ca-99d2-5def2e4ad774.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jsfs11_MixtureofMerges-MoE-4x7b-v5/1762652580.30316", - "retrieved_timestamp": "1762652580.30316", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jsfs11/MixtureofMerges-MoE-4x7b-v5", - "developer": "jsfs11", - "inference_platform": "unknown", - "id": "jsfs11/MixtureofMerges-MoE-4x7b-v5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41993022956865567 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5198481257083689 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0755287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4304895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3097573138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.154 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/kaist-ai/kaist-ai_janus-7b/3ab8b78b-a9f9-428c-9469-afaa4158a0a6.json b/leaderboard_data/HFOpenLLMv2/kaist-ai/kaist-ai_janus-7b/3ab8b78b-a9f9-428c-9469-afaa4158a0a6.json deleted file mode 100644 index 0b29fc2c4a4c1f9dc8c7c87ec8746ccaf2d0878c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/kaist-ai/kaist-ai_janus-7b/3ab8b78b-a9f9-428c-9469-afaa4158a0a6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/kaist-ai_janus-7b/1762652580.303385", - "retrieved_timestamp": "1762652580.3033862", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "kaist-ai/janus-7b", - "developer": "kaist-ai", - "inference_platform": "unknown", - "id": "kaist-ai/janus-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37751499355044615 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4693667591541633 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4401041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28740026595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/kaist-ai/kaist-ai_janus-dpo-7b/2a78f22b-d898-4f92-a2a5-c2930c16916c.json b/leaderboard_data/HFOpenLLMv2/kaist-ai/kaist-ai_janus-dpo-7b/2a78f22b-d898-4f92-a2a5-c2930c16916c.json deleted file mode 100644 index 21b29e6d8de4aa60c3ba98104685b4b0e25f19ac..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/kaist-ai/kaist-ai_janus-dpo-7b/2a78f22b-d898-4f92-a2a5-c2930c16916c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/kaist-ai_janus-dpo-7b/1762652580.303661", - "retrieved_timestamp": "1762652580.303662", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "kaist-ai/janus-dpo-7b", - "developer": "kaist-ai", - "inference_platform": "unknown", - "id": "kaist-ai/janus-dpo-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4002712802031942 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4772581104894978 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43873958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2976230053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/kaist-ai/kaist-ai_janus-rm-7b/46f57920-759b-4d1a-b2f5-fe66aa740170.json b/leaderboard_data/HFOpenLLMv2/kaist-ai/kaist-ai_janus-rm-7b/46f57920-759b-4d1a-b2f5-fe66aa740170.json deleted file mode 100644 index a9c9a4881fd27a24c9576d5996e75edd44e6645c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/kaist-ai/kaist-ai_janus-rm-7b/46f57920-759b-4d1a-b2f5-fe66aa740170.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/kaist-ai_janus-rm-7b/1762652580.303882", - "retrieved_timestamp": "1762652580.303883", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "kaist-ai/janus-rm-7b", - "developer": "kaist-ai", - "inference_platform": "unknown", - "id": "kaist-ai/janus-rm-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.177804891022487 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3056467446788138 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38829166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11261635638297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LLMForSequenceRegression", - "params_billions": 7.111 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/kavonalds/kavonalds_BunderMaxx-0710/10be7d08-18a9-43a6-80ea-81d704600eab.json b/leaderboard_data/HFOpenLLMv2/kavonalds/kavonalds_BunderMaxx-0710/10be7d08-18a9-43a6-80ea-81d704600eab.json deleted file mode 100644 index 46c83b12410b6fe7c96eec1823ee500eecca1f67..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/kavonalds/kavonalds_BunderMaxx-0710/10be7d08-18a9-43a6-80ea-81d704600eab.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/kavonalds_BunderMaxx-0710/1762652580.304877", - "retrieved_timestamp": "1762652580.3048792", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "kavonalds/BunderMaxx-0710", - "developer": "kavonalds", - "inference_platform": "unknown", - "id": "kavonalds/BunderMaxx-0710" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27007894608527594 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.556586279503196 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3681979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1449468085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/kavonalds/kavonalds_BunderMaxx-0710/63d646bf-14d2-4cc7-ab82-efd1645cc1ba.json b/leaderboard_data/HFOpenLLMv2/kavonalds/kavonalds_BunderMaxx-0710/63d646bf-14d2-4cc7-ab82-efd1645cc1ba.json deleted file mode 100644 index 82ac95adc10d99fb3dee4cde73ae0e7724febf4b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/kavonalds/kavonalds_BunderMaxx-0710/63d646bf-14d2-4cc7-ab82-efd1645cc1ba.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/kavonalds_BunderMaxx-0710/1762652580.3044312", - "retrieved_timestamp": "1762652580.3044322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "kavonalds/BunderMaxx-0710", - "developer": "kavonalds", - "inference_platform": "unknown", - "id": "kavonalds/BunderMaxx-0710" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32825569488955975 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6650758850169982 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3393333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13139960106382978 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/kavonalds/kavonalds_BunderMaxx-1010/6b0275ea-f2eb-4a37-922c-d1f734c1a6d3.json b/leaderboard_data/HFOpenLLMv2/kavonalds/kavonalds_BunderMaxx-1010/6b0275ea-f2eb-4a37-922c-d1f734c1a6d3.json deleted file mode 100644 index e0023f78b8d323f62c5b5c8f7f7b412adb108533..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/kavonalds/kavonalds_BunderMaxx-1010/6b0275ea-f2eb-4a37-922c-d1f734c1a6d3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/kavonalds_BunderMaxx-1010/1762652580.305197", - "retrieved_timestamp": "1762652580.3051982", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "kavonalds/BunderMaxx-1010", - "developer": "kavonalds", - "inference_platform": "unknown", - "id": "kavonalds/BunderMaxx-1010" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2980558252104416 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7019840419971701 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10498489425981873 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3484479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12242353723404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/kavonalds/kavonalds_Lancer-1-1b-Instruct/ae2afa83-4607-43ea-be11-86cc57f3b848.json b/leaderboard_data/HFOpenLLMv2/kavonalds/kavonalds_Lancer-1-1b-Instruct/ae2afa83-4607-43ea-be11-86cc57f3b848.json deleted file mode 100644 index 0f6bfe2fc4ce960f50a49ae770f38240a215a46c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/kavonalds/kavonalds_Lancer-1-1b-Instruct/ae2afa83-4607-43ea-be11-86cc57f3b848.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/kavonalds_Lancer-1-1b-Instruct/1762652580.305463", - "retrieved_timestamp": "1762652580.305465", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "kavonalds/Lancer-1-1b-Instruct", - "developer": "kavonalds", - "inference_platform": "unknown", - "id": "kavonalds/Lancer-1-1b-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5545940327220664 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32532742727549835 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3144375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1568317819148936 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/kekmodel/kekmodel_StopCarbon-10.7B-v5/25b7d35b-8b5f-44ac-afae-e0f71ba8a0ff.json b/leaderboard_data/HFOpenLLMv2/kekmodel/kekmodel_StopCarbon-10.7B-v5/25b7d35b-8b5f-44ac-afae-e0f71ba8a0ff.json deleted file mode 100644 index 2ed43dd173ec0bca32611c783559124b38e741d0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/kekmodel/kekmodel_StopCarbon-10.7B-v5/25b7d35b-8b5f-44ac-afae-e0f71ba8a0ff.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/kekmodel_StopCarbon-10.7B-v5/1762652580.306321", - "retrieved_timestamp": "1762652580.3063219", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "kekmodel/StopCarbon-10.7B-v5", - "developer": "kekmodel", - "inference_platform": "unknown", - "id": "kekmodel/StopCarbon-10.7B-v5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47283651821611106 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5177716413471513 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4019375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3156582446808511 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/khoantap/khoantap_cheap-moe-merge/9ef977af-b10c-4434-bf4c-9783903e75a9.json b/leaderboard_data/HFOpenLLMv2/khoantap/khoantap_cheap-moe-merge/9ef977af-b10c-4434-bf4c-9783903e75a9.json deleted file mode 100644 index b742e29b327acd59153fa58b4af21df1d1584fbc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/khoantap/khoantap_cheap-moe-merge/9ef977af-b10c-4434-bf4c-9783903e75a9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/khoantap_cheap-moe-merge/1762652580.3070369", - "retrieved_timestamp": "1762652580.307038", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "khoantap/cheap-moe-merge", - "developer": "khoantap", - "inference_platform": "unknown", - "id": "khoantap/cheap-moe-merge" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4557008736818309 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.513116897226939 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09214501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4103020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338597074468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 19.305 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/khoantap/khoantap_moe-out-merge/326fc05a-78e9-4e36-933c-aa0219661e0d.json b/leaderboard_data/HFOpenLLMv2/khoantap/khoantap_moe-out-merge/326fc05a-78e9-4e36-933c-aa0219661e0d.json deleted file mode 100644 index 205b3a59c5425478f78c22e975f8efe01e9af742..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/khoantap/khoantap_moe-out-merge/326fc05a-78e9-4e36-933c-aa0219661e0d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/khoantap_moe-out-merge/1762652580.309191", - "retrieved_timestamp": "1762652580.309192", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "khoantap/moe-out-merge", - "developer": "khoantap", - "inference_platform": "unknown", - "id": "khoantap/moe-out-merge" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4504802812094133 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.515116897226939 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09290030211480363 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40630208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347739361702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2MoeForCausalLM", - "params_billions": 19.305 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/kms7530/kms7530_chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1/6cb03909-9850-4519-9e67-f2d875652e02.json b/leaderboard_data/HFOpenLLMv2/kms7530/kms7530_chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1/6cb03909-9850-4519-9e67-f2d875652e02.json deleted file mode 100644 index f80fc87313abae49cb494d3f5b37bb0ec490e063..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/kms7530/kms7530_chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1/6cb03909-9850-4519-9e67-f2d875652e02.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/kms7530_chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1/1762652580.309702", - "retrieved_timestamp": "1762652580.3097029", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "kms7530/chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1", - "developer": "kms7530", - "inference_platform": "unknown", - "id": "kms7530/chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5455014915978493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42890394469736065 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38206249999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2798371010638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 9.3 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/kms7530/kms7530_chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath/51a11592-e099-4059-9e97-f8924e1c2437.json b/leaderboard_data/HFOpenLLMv2/kms7530/kms7530_chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath/51a11592-e099-4059-9e97-f8924e1c2437.json deleted file mode 100644 index b800cd43778e984245b409a1f5a7e500d1c65829..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/kms7530/kms7530_chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath/51a11592-e099-4059-9e97-f8924e1c2437.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/kms7530_chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath/1762652580.309973", - "retrieved_timestamp": "1762652580.309974", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "kms7530/chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath", - "developer": "kms7530", - "inference_platform": "unknown", - "id": "kms7530/chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4863251727638222 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49871846432893613 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39828125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3480718085106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 4.132 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/kno10/kno10_ende-chat-0.0.5/af2f11cf-8efa-4c71-a0b2-74f953b8e61b.json b/leaderboard_data/HFOpenLLMv2/kno10/kno10_ende-chat-0.0.5/af2f11cf-8efa-4c71-a0b2-74f953b8e61b.json deleted file mode 100644 index 7feb305837e700b7e18d62093e685497ddc6361e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/kno10/kno10_ende-chat-0.0.5/af2f11cf-8efa-4c71-a0b2-74f953b8e61b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/kno10_ende-chat-0.0.5/1762652580.310679", - "retrieved_timestamp": "1762652580.3106802", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "kno10/ende-chat-0.0.5", - "developer": "kno10", - "inference_platform": "unknown", - "id": "kno10/ende-chat-0.0.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3404455733010634 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3604365707523862 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39384375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17902260638297873 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.891 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/kno10/kno10_ende-chat-0.0.7/6619dec7-71cf-4be6-90e2-815e8dd4e56f.json b/leaderboard_data/HFOpenLLMv2/kno10/kno10_ende-chat-0.0.7/6619dec7-71cf-4be6-90e2-815e8dd4e56f.json deleted file mode 100644 index a575c8d710fe073f06cf1fd697894d479bfa7f3d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/kno10/kno10_ende-chat-0.0.7/6619dec7-71cf-4be6-90e2-815e8dd4e56f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/kno10_ende-chat-0.0.7/1762652580.310943", - "retrieved_timestamp": "1762652580.310944", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "kno10/ende-chat-0.0.7", - "developer": "kno10", - "inference_platform": "unknown", - "id": "kno10/ende-chat-0.0.7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.440063476021401 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37918745577624335 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.386125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19664228723404256 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.891 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/kyutai/kyutai_helium-1-preview-2b/ce4ddb86-646e-4c59-8a03-3687dbb77021.json b/leaderboard_data/HFOpenLLMv2/kyutai/kyutai_helium-1-preview-2b/ce4ddb86-646e-4c59-8a03-3687dbb77021.json deleted file mode 100644 index 6d75b66d454200e4ff832516cefe77f0fb465040..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/kyutai/kyutai_helium-1-preview-2b/ce4ddb86-646e-4c59-8a03-3687dbb77021.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/kyutai_helium-1-preview-2b/1762652580.3111548", - "retrieved_timestamp": "1762652580.3111548", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "kyutai/helium-1-preview-2b", - "developer": "kyutai", - "inference_platform": "unknown", - "id": "kyutai/helium-1-preview-2b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26136096667952147 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3638164815956466 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3549583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18725066489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "HeliumForCausalLM", - "params_billions": 2.173 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/kz919/kz919_QwQ-0.5B-Distilled-SFT/08efd69e-6ff6-48a1-b260-ddbb4a942d12.json b/leaderboard_data/HFOpenLLMv2/kz919/kz919_QwQ-0.5B-Distilled-SFT/08efd69e-6ff6-48a1-b260-ddbb4a942d12.json deleted file mode 100644 index 273a2c47d781bf4a81b415a1da1a1b6d8fd46513..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/kz919/kz919_QwQ-0.5B-Distilled-SFT/08efd69e-6ff6-48a1-b260-ddbb4a942d12.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/kz919_QwQ-0.5B-Distilled-SFT/1762652580.311408", - "retrieved_timestamp": "1762652580.311409", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "kz919/QwQ-0.5B-Distilled-SFT", - "developer": "kz919", - "inference_platform": "unknown", - "id": "kz919/QwQ-0.5B-Distilled-SFT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3076725311063534 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3256291569645335 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3408541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15874335106382978 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ladydaina/ladydaina_ECE-FDF/737cda34-7dea-4c68-b6a3-5b10066f9241.json b/leaderboard_data/HFOpenLLMv2/ladydaina/ladydaina_ECE-FDF/737cda34-7dea-4c68-b6a3-5b10066f9241.json deleted file mode 100644 index 82ea871a48e1760d06c39705b85397b0851f9c2b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ladydaina/ladydaina_ECE-FDF/737cda34-7dea-4c68-b6a3-5b10066f9241.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ladydaina_ECE-FDF/1762652580.311657", - "retrieved_timestamp": "1762652580.311657", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ladydaina/ECE-FDF", - "developer": "ladydaina", - "inference_platform": "unknown", - "id": "ladydaina/ECE-FDF" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3728440537773109 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5150177593278346 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45039583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30069813829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lalainy/lalainy_ECE-PRYMMAL-0.5B-FT-V5-MUSR/012fb237-8082-40d9-882e-0dd7bc9c74cb.json b/leaderboard_data/HFOpenLLMv2/lalainy/lalainy_ECE-PRYMMAL-0.5B-FT-V5-MUSR/012fb237-8082-40d9-882e-0dd7bc9c74cb.json deleted file mode 100644 index 9082ddb900dd2c2a377573f9afbacb8dbf255776..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lalainy/lalainy_ECE-PRYMMAL-0.5B-FT-V5-MUSR/012fb237-8082-40d9-882e-0dd7bc9c74cb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-0.5B-FT-V5-MUSR/1762652580.312166", - "retrieved_timestamp": "1762652580.312166", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lalainy/ECE-PRYMMAL-0.5B-FT-V5-MUSR", - "developer": "lalainy", - "inference_platform": "unknown", - "id": "lalainy/ECE-PRYMMAL-0.5B-FT-V5-MUSR" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21377500587330506 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32694393820046386 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15334109042553193 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lalainy/lalainy_ECE-PRYMMAL-0.5B-SLERP-V4/869daca0-a700-464d-a551-290ed454421e.json b/leaderboard_data/HFOpenLLMv2/lalainy/lalainy_ECE-PRYMMAL-0.5B-SLERP-V4/869daca0-a700-464d-a551-290ed454421e.json deleted file mode 100644 index b6ed63cef04bef2afcfd8cf82cc873b28434934e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lalainy/lalainy_ECE-PRYMMAL-0.5B-SLERP-V4/869daca0-a700-464d-a551-290ed454421e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-0.5B-SLERP-V4/1762652580.312417", - "retrieved_timestamp": "1762652580.312417", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lalainy/ECE-PRYMMAL-0.5B-SLERP-V4", - "developer": "lalainy", - "inference_platform": "unknown", - "id": "lalainy/ECE-PRYMMAL-0.5B-SLERP-V4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15639724819035714 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2894308596288922 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37892708333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11685505319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lalainy/lalainy_ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1/8822f27f-90ec-41a8-b71a-611f7c5ad590.json b/leaderboard_data/HFOpenLLMv2/lalainy/lalainy_ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1/8822f27f-90ec-41a8-b71a-611f7c5ad590.json deleted file mode 100644 index 39c96b6ac2d00cb4139004da0de9df08c64a9897..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lalainy/lalainy_ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1/8822f27f-90ec-41a8-b71a-611f7c5ad590.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1/1762652580.31263", - "retrieved_timestamp": "1762652580.31263", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lalainy/ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1", - "developer": "lalainy", - "inference_platform": "unknown", - "id": "lalainy/ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1437075847639818 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3031946898842932 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3646041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11211768617021277 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lalainy/lalainy_ECE-PRYMMAL-YL-1B-SLERP-V3/fa3c7a13-b37e-40b3-b814-b1ae421081ba.json b/leaderboard_data/HFOpenLLMv2/lalainy/lalainy_ECE-PRYMMAL-YL-1B-SLERP-V3/fa3c7a13-b37e-40b3-b814-b1ae421081ba.json deleted file mode 100644 index efc826d73b6d0db2036cde8d365d7f81233ed85c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lalainy/lalainy_ECE-PRYMMAL-YL-1B-SLERP-V3/fa3c7a13-b37e-40b3-b814-b1ae421081ba.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-YL-1B-SLERP-V3/1762652580.31284", - "retrieved_timestamp": "1762652580.312841", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lalainy/ECE-PRYMMAL-YL-1B-SLERP-V3", - "developer": "lalainy", - "inference_platform": "unknown", - "id": "lalainy/ECE-PRYMMAL-YL-1B-SLERP-V3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.325008754549041 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42245501886651654 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42128125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2931349734042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lalainy/lalainy_ECE-PRYMMAL-YL-1B-SLERP-V4/2ede8e21-33e9-45ac-9c60-9a4bd7e8e3cb.json b/leaderboard_data/HFOpenLLMv2/lalainy/lalainy_ECE-PRYMMAL-YL-1B-SLERP-V4/2ede8e21-33e9-45ac-9c60-9a4bd7e8e3cb.json deleted file mode 100644 index 16db022dfe06a9f9708ef7c9e266eacdd3c6e926..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lalainy/lalainy_ECE-PRYMMAL-YL-1B-SLERP-V4/2ede8e21-33e9-45ac-9c60-9a4bd7e8e3cb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-YL-1B-SLERP-V4/1762652580.3130481", - "retrieved_timestamp": "1762652580.313049", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lalainy/ECE-PRYMMAL-YL-1B-SLERP-V4", - "developer": "lalainy", - "inference_platform": "unknown", - "id": "lalainy/ECE-PRYMMAL-YL-1B-SLERP-V4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33235260220658963 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4170742409015322 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10045317220543806 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4306145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.289311835106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lalainy/lalainy_ECE-PRYMMAL-YL-6B-SLERP-V1/85ac95fd-cb36-4158-818d-69c45f83dae9.json b/leaderboard_data/HFOpenLLMv2/lalainy/lalainy_ECE-PRYMMAL-YL-6B-SLERP-V1/85ac95fd-cb36-4158-818d-69c45f83dae9.json deleted file mode 100644 index 847d5c98907933d03cb9029f4adb9e957985ef54..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lalainy/lalainy_ECE-PRYMMAL-YL-6B-SLERP-V1/85ac95fd-cb36-4158-818d-69c45f83dae9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-YL-6B-SLERP-V1/1762652580.31332", - "retrieved_timestamp": "1762652580.3133209", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lalainy/ECE-PRYMMAL-YL-6B-SLERP-V1", - "developer": "lalainy", - "inference_platform": "unknown", - "id": "lalainy/ECE-PRYMMAL-YL-6B-SLERP-V1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3264072660540699 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46293726502592586 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48639583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32139295212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.061 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lalainy/lalainy_ECE-PRYMMAL-YL-6B-SLERP-V2/fd2e3c0b-8b35-463c-a001-444ed6e6dd9a.json b/leaderboard_data/HFOpenLLMv2/lalainy/lalainy_ECE-PRYMMAL-YL-6B-SLERP-V2/fd2e3c0b-8b35-463c-a001-444ed6e6dd9a.json deleted file mode 100644 index 2f31882a59bf2f59a2fffed74f5c81559900f67a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lalainy/lalainy_ECE-PRYMMAL-YL-6B-SLERP-V2/fd2e3c0b-8b35-463c-a001-444ed6e6dd9a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lalainy_ECE-PRYMMAL-YL-6B-SLERP-V2/1762652580.3135412", - "retrieved_timestamp": "1762652580.3135412", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lalainy/ECE-PRYMMAL-YL-6B-SLERP-V2", - "developer": "lalainy", - "inference_platform": "unknown", - "id": "lalainy/ECE-PRYMMAL-YL-6B-SLERP-V2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3248835312526319 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46293726502592586 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48639583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32139295212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.061 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/langgptai/langgptai_qwen1.5-7b-chat-sa-v0.1/36137543-78a7-42a6-ad41-a4121797eec4.json b/leaderboard_data/HFOpenLLMv2/langgptai/langgptai_qwen1.5-7b-chat-sa-v0.1/36137543-78a7-42a6-ad41-a4121797eec4.json deleted file mode 100644 index 840af2daea0098080ab4e2f54931b8f3813bca46..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/langgptai/langgptai_qwen1.5-7b-chat-sa-v0.1/36137543-78a7-42a6-ad41-a4121797eec4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/langgptai_qwen1.5-7b-chat-sa-v0.1/1762652580.314067", - "retrieved_timestamp": "1762652580.314068", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "langgptai/qwen1.5-7b-chat-sa-v0.1", - "developer": "langgptai", - "inference_platform": "unknown", - "id": "langgptai/qwen1.5-7b-chat-sa-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42677429221133256 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4325267992878656 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3551458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29928523936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 15.443 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lars1234/lars1234_Mistral-Small-24B-Instruct-2501-writer/89742249-c51e-48e9-8bf1-7aad55e222c1.json b/leaderboard_data/HFOpenLLMv2/lars1234/lars1234_Mistral-Small-24B-Instruct-2501-writer/89742249-c51e-48e9-8bf1-7aad55e222c1.json deleted file mode 100644 index 2eb6dd3f50322d38e840b74f7e1e28780cf59885..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lars1234/lars1234_Mistral-Small-24B-Instruct-2501-writer/89742249-c51e-48e9-8bf1-7aad55e222c1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lars1234_Mistral-Small-24B-Instruct-2501-writer/1762652580.314311", - "retrieved_timestamp": "1762652580.314312", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lars1234/Mistral-Small-24B-Instruct-2501-writer", - "developer": "lars1234", - "inference_platform": "unknown", - "id": "lars1234/Mistral-Small-24B-Instruct-2501-writer" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6565346613651777 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6733164099871131 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3557401812688822 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38926174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46453125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5447972074468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/leafspark/leafspark_Llama-3.1-8B-MultiReflection-Instruct/c8a287fc-db9e-4088-aafe-0562aa305011.json b/leaderboard_data/HFOpenLLMv2/leafspark/leafspark_Llama-3.1-8B-MultiReflection-Instruct/c8a287fc-db9e-4088-aafe-0562aa305011.json deleted file mode 100644 index f7e20526286d6c20065334335d14d2b48f276c61..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/leafspark/leafspark_Llama-3.1-8B-MultiReflection-Instruct/c8a287fc-db9e-4088-aafe-0562aa305011.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/leafspark_Llama-3.1-8B-MultiReflection-Instruct/1762652580.3145778", - "retrieved_timestamp": "1762652580.3145778", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "leafspark/Llama-3.1-8B-MultiReflection-Instruct", - "developer": "leafspark", - "inference_platform": "unknown", - "id": "leafspark/Llama-3.1-8B-MultiReflection-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7125382872999197 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5009088261495708 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3681979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37242353723404253 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lesubra/lesubra_ECE-EIFFEL-3B/b32f3852-47ce-4ca5-98a0-5e2f166a11e9.json b/leaderboard_data/HFOpenLLMv2/lesubra/lesubra_ECE-EIFFEL-3B/b32f3852-47ce-4ca5-98a0-5e2f166a11e9.json deleted file mode 100644 index 03be257dcd731c56eb67dd65cfe38b5abb944a35..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lesubra/lesubra_ECE-EIFFEL-3B/b32f3852-47ce-4ca5-98a0-5e2f166a11e9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lesubra_ECE-EIFFEL-3B/1762652580.319232", - "retrieved_timestamp": "1762652580.319233", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lesubra/ECE-EIFFEL-3B", - "developer": "lesubra", - "inference_platform": "unknown", - "id": "lesubra/ECE-EIFFEL-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3469405621528655 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5101583259186949 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43622916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3820644946808511 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lesubra/lesubra_ECE-EIFFEL-3Bv2/7e511f3b-7d8e-44c4-ad3f-7f6e66231109.json b/leaderboard_data/HFOpenLLMv2/lesubra/lesubra_ECE-EIFFEL-3Bv2/7e511f3b-7d8e-44c4-ad3f-7f6e66231109.json deleted file mode 100644 index fb79e9eba411f52647952ba803cc539b692825e0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lesubra/lesubra_ECE-EIFFEL-3Bv2/7e511f3b-7d8e-44c4-ad3f-7f6e66231109.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lesubra_ECE-EIFFEL-3Bv2/1762652580.319594", - "retrieved_timestamp": "1762652580.319595", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lesubra/ECE-EIFFEL-3Bv2", - "developer": "lesubra", - "inference_platform": "unknown", - "id": "lesubra/ECE-EIFFEL-3Bv2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30130276555096036 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5424007873371969 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11858006042296072 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4442916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39993351063829785 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lesubra/lesubra_ECE-EIFFEL-3Bv3/317a27cd-9458-4157-a304-0c1e3739d0fb.json b/leaderboard_data/HFOpenLLMv2/lesubra/lesubra_ECE-EIFFEL-3Bv3/317a27cd-9458-4157-a304-0c1e3739d0fb.json deleted file mode 100644 index f42d21d39d4284982e0501de877f0ca6a5922e38..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lesubra/lesubra_ECE-EIFFEL-3Bv3/317a27cd-9458-4157-a304-0c1e3739d0fb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lesubra_ECE-EIFFEL-3Bv3/1762652580.319853", - "retrieved_timestamp": "1762652580.319854", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lesubra/ECE-EIFFEL-3Bv3", - "developer": "lesubra", - "inference_platform": "unknown", - "id": "lesubra/ECE-EIFFEL-3Bv3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3786142989490109 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5469446669064592 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16691842900302115 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3296979865771812 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46751041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39752327127659576 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lesubra/lesubra_ECE-PRYMMAL-3B-SLERP-V1/6fb1242d-bf20-43e6-acfe-77a88c020eee.json b/leaderboard_data/HFOpenLLMv2/lesubra/lesubra_ECE-PRYMMAL-3B-SLERP-V1/6fb1242d-bf20-43e6-acfe-77a88c020eee.json deleted file mode 100644 index 32407a1d1b2e72f05df3a530bfa988577fb7b04e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lesubra/lesubra_ECE-PRYMMAL-3B-SLERP-V1/6fb1242d-bf20-43e6-acfe-77a88c020eee.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lesubra_ECE-PRYMMAL-3B-SLERP-V1/1762652580.320159", - "retrieved_timestamp": "1762652580.32016", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lesubra/ECE-PRYMMAL-3B-SLERP-V1", - "developer": "lesubra", - "inference_platform": "unknown", - "id": "lesubra/ECE-PRYMMAL-3B-SLERP-V1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2932840418977203 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5340594627933309 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45951041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3900432180851064 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lesubra/lesubra_ECE-PRYMMAL-3B-SLERP-V2/cb14b942-7c2f-489f-bede-d25279ea39ac.json b/leaderboard_data/HFOpenLLMv2/lesubra/lesubra_ECE-PRYMMAL-3B-SLERP-V2/cb14b942-7c2f-489f-bede-d25279ea39ac.json deleted file mode 100644 index 7283b66728d56caebb52e461eacec779e7fc7910..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lesubra/lesubra_ECE-PRYMMAL-3B-SLERP-V2/cb14b942-7c2f-489f-bede-d25279ea39ac.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lesubra_ECE-PRYMMAL-3B-SLERP-V2/1762652580.320386", - "retrieved_timestamp": "1762652580.3203871", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lesubra/ECE-PRYMMAL-3B-SLERP-V2", - "developer": "lesubra", - "inference_platform": "unknown", - "id": "lesubra/ECE-PRYMMAL-3B-SLERP-V2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2932840418977203 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5340594627933309 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45951041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3900432180851064 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lesubra/lesubra_ECE-PRYMMAL-3B-SLERP_2-V1/c6b7d02d-4d2d-43fa-95a8-aa188f38120a.json b/leaderboard_data/HFOpenLLMv2/lesubra/lesubra_ECE-PRYMMAL-3B-SLERP_2-V1/c6b7d02d-4d2d-43fa-95a8-aa188f38120a.json deleted file mode 100644 index f45bbb7a0e1a3356b6aed05eef136881866f5f28..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lesubra/lesubra_ECE-PRYMMAL-3B-SLERP_2-V1/c6b7d02d-4d2d-43fa-95a8-aa188f38120a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lesubra_ECE-PRYMMAL-3B-SLERP_2-V1/1762652580.320611", - "retrieved_timestamp": "1762652580.3206122", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lesubra/ECE-PRYMMAL-3B-SLERP_2-V1", - "developer": "lesubra", - "inference_platform": "unknown", - "id": "lesubra/ECE-PRYMMAL-3B-SLERP_2-V1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3649006857360692 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5411447467732948 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16767371601208458 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4661458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3990192819148936 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lesubra/lesubra_ECE-PRYMMAL-3B-SLERP_2-V2/653cb458-4616-4325-b377-a79ee4a5d9c6.json b/leaderboard_data/HFOpenLLMv2/lesubra/lesubra_ECE-PRYMMAL-3B-SLERP_2-V2/653cb458-4616-4325-b377-a79ee4a5d9c6.json deleted file mode 100644 index 8cf1fe49d78b243c5748c8751b431f43ec01389a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lesubra/lesubra_ECE-PRYMMAL-3B-SLERP_2-V2/653cb458-4616-4325-b377-a79ee4a5d9c6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lesubra_ECE-PRYMMAL-3B-SLERP_2-V2/1762652580.320825", - "retrieved_timestamp": "1762652580.320826", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lesubra/ECE-PRYMMAL-3B-SLERP_2-V2", - "developer": "lesubra", - "inference_platform": "unknown", - "id": "lesubra/ECE-PRYMMAL-3B-SLERP_2-V2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3664244205375071 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5411447467732948 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16767371601208458 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4661458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3990192819148936 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lesubra/lesubra_merge-test/6f16b360-346a-4299-8f60-fafc0bb8ebcd.json b/leaderboard_data/HFOpenLLMv2/lesubra/lesubra_merge-test/6f16b360-346a-4299-8f60-fafc0bb8ebcd.json deleted file mode 100644 index 70d8ede22e6069a75d9d3ca67c8cdd6b51bb4b8a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lesubra/lesubra_merge-test/6f16b360-346a-4299-8f60-fafc0bb8ebcd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lesubra_merge-test/1762652580.321054", - "retrieved_timestamp": "1762652580.321055", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lesubra/merge-test", - "developer": "lesubra", - "inference_platform": "unknown", - "id": "lesubra/merge-test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.538257379309122 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5240434385320306 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44190625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38738364361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lkoenig/lkoenig_BBAI_145_/0f29b1ac-1943-463a-8a79-a4c0ace371cb.json b/leaderboard_data/HFOpenLLMv2/lkoenig/lkoenig_BBAI_145_/0f29b1ac-1943-463a-8a79-a4c0ace371cb.json deleted file mode 100644 index 689cbf795ecd063049bd9c95ae31162bf30821c2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lkoenig/lkoenig_BBAI_145_/0f29b1ac-1943-463a-8a79-a4c0ace371cb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_145_/1762652580.322459", - "retrieved_timestamp": "1762652580.32246", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lkoenig/BBAI_145_", - "developer": "lkoenig", - "inference_platform": "unknown", - "id": "lkoenig/BBAI_145_" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44503473007176514 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5567169940219221 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3610271903323263 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4382083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.448969414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/llnYou/llnYou_ECE-PRYMMAL-YL-1B-SLERP-V5/334bc38a-becd-405b-8982-dfaf5de35c4b.json b/leaderboard_data/HFOpenLLMv2/llnYou/llnYou_ECE-PRYMMAL-YL-1B-SLERP-V5/334bc38a-becd-405b-8982-dfaf5de35c4b.json deleted file mode 100644 index 20f39839c0b37e5b007375412fb6f84e3b34b39b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/llnYou/llnYou_ECE-PRYMMAL-YL-1B-SLERP-V5/334bc38a-becd-405b-8982-dfaf5de35c4b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/llnYou_ECE-PRYMMAL-YL-1B-SLERP-V5/1762652580.3253949", - "retrieved_timestamp": "1762652580.325396", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "llnYou/ECE-PRYMMAL-YL-1B-SLERP-V5", - "developer": "llnYou", - "inference_platform": "unknown", - "id": "llnYou/ECE-PRYMMAL-YL-1B-SLERP-V5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33125329680802496 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42329545804357255 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11102719033232629 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3868020833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29305186170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/llnYou/llnYou_ECE-PRYMMAL-YL-1B-SLERP-V6/eaa1adca-5379-4aab-bf39-8641df58a530.json b/leaderboard_data/HFOpenLLMv2/llnYou/llnYou_ECE-PRYMMAL-YL-1B-SLERP-V6/eaa1adca-5379-4aab-bf39-8641df58a530.json deleted file mode 100644 index c367e488dbc0c641f5118115c316038ab7f8f0e2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/llnYou/llnYou_ECE-PRYMMAL-YL-1B-SLERP-V6/eaa1adca-5379-4aab-bf39-8641df58a530.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/llnYou_ECE-PRYMMAL-YL-1B-SLERP-V6/1762652580.325702", - "retrieved_timestamp": "1762652580.325703", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "llnYou/ECE-PRYMMAL-YL-1B-SLERP-V6", - "developer": "llnYou", - "inference_platform": "unknown", - "id": "llnYou/ECE-PRYMMAL-YL-1B-SLERP-V6" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13876181864120535 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3944027089700251 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39279166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2349567819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.357 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/llnYou/llnYou_ECE-PRYMMAL-YL-3B-SLERP-V1/844c959f-6859-4220-bdd8-99e6af53808b.json b/leaderboard_data/HFOpenLLMv2/llnYou/llnYou_ECE-PRYMMAL-YL-3B-SLERP-V1/844c959f-6859-4220-bdd8-99e6af53808b.json deleted file mode 100644 index 86a6830d8858205b502074b8396496461573828f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/llnYou/llnYou_ECE-PRYMMAL-YL-3B-SLERP-V1/844c959f-6859-4220-bdd8-99e6af53808b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/llnYou_ECE-PRYMMAL-YL-3B-SLERP-V1/1762652580.325917", - "retrieved_timestamp": "1762652580.325917", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V1", - "developer": "llnYou", - "inference_platform": "unknown", - "id": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23463299600615256 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4018418465179459 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3364479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2849900265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.81 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/llnYou/llnYou_ECE-PRYMMAL-YL-3B-SLERP-V2/2bb16fd8-516f-42d6-91e1-2f3f4024f0d4.json b/leaderboard_data/HFOpenLLMv2/llnYou/llnYou_ECE-PRYMMAL-YL-3B-SLERP-V2/2bb16fd8-516f-42d6-91e1-2f3f4024f0d4.json deleted file mode 100644 index 832d61d83c582333464866bd00ae93f7ca49c271..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/llnYou/llnYou_ECE-PRYMMAL-YL-3B-SLERP-V2/2bb16fd8-516f-42d6-91e1-2f3f4024f0d4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/llnYou_ECE-PRYMMAL-YL-3B-SLERP-V2/1762652580.326129", - "retrieved_timestamp": "1762652580.326129", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V2", - "developer": "llnYou", - "inference_platform": "unknown", - "id": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2309361383351729 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39897709281426197 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3587708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28997672872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.81 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/llnYou/llnYou_ECE-PRYMMAL-YL-3B-SLERP-V3/183cd87c-2415-4428-9ad1-9d41c0dcdc41.json b/leaderboard_data/HFOpenLLMv2/llnYou/llnYou_ECE-PRYMMAL-YL-3B-SLERP-V3/183cd87c-2415-4428-9ad1-9d41c0dcdc41.json deleted file mode 100644 index 49e14aff074d2554bb8bbc7b5703e64312ae0566..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/llnYou/llnYou_ECE-PRYMMAL-YL-3B-SLERP-V3/183cd87c-2415-4428-9ad1-9d41c0dcdc41.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/llnYou_ECE-PRYMMAL-YL-3B-SLERP-V3/1762652580.326333", - "retrieved_timestamp": "1762652580.326334", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V3", - "developer": "llnYou", - "inference_platform": "unknown", - "id": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35808100285021516 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5473121918055145 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43613541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40433843085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lmsys/lmsys_vicuna-13b-v1.3/5b0377fc-5df1-4ed0-bad4-ab13bc42677c.json b/leaderboard_data/HFOpenLLMv2/lmsys/lmsys_vicuna-13b-v1.3/5b0377fc-5df1-4ed0-bad4-ab13bc42677c.json deleted file mode 100644 index e389ddb8772c7ad0058c76029850515b6be499bd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lmsys/lmsys_vicuna-13b-v1.3/5b0377fc-5df1-4ed0-bad4-ab13bc42677c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lmsys_vicuna-13b-v1.3/1762652580.3265438", - "retrieved_timestamp": "1762652580.326545", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lmsys/vicuna-13b-v1.3", - "developer": "lmsys", - "inference_platform": "unknown", - "id": "lmsys/vicuna-13b-v1.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3343506340953115 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3384399312777569 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3727291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2243184840425532 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lmsys/lmsys_vicuna-7b-v1.3/b8e50988-f2c5-4508-a5c5-2813d94f7ebd.json b/leaderboard_data/HFOpenLLMv2/lmsys/lmsys_vicuna-7b-v1.3/b8e50988-f2c5-4508-a5c5-2813d94f7ebd.json deleted file mode 100644 index f23754b104e4ee863acfadf66b6e5be84e6a73d6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lmsys/lmsys_vicuna-7b-v1.3/b8e50988-f2c5-4508-a5c5-2813d94f7ebd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lmsys_vicuna-7b-v1.3/1762652580.326798", - "retrieved_timestamp": "1762652580.3267992", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lmsys/vicuna-7b-v1.3", - "developer": "lmsys", - "inference_platform": "unknown", - "id": "lmsys/vicuna-7b-v1.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29086158060612505 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3298410006592924 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2424496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3793333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18375997340425532 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lmsys/lmsys_vicuna-7b-v1.5/26c5c07e-8482-44b4-8f11-a602e79fb730.json b/leaderboard_data/HFOpenLLMv2/lmsys/lmsys_vicuna-7b-v1.5/26c5c07e-8482-44b4-8f11-a602e79fb730.json deleted file mode 100644 index 3e212ae23ddbe8a3f83243cf32267270fd701833..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lmsys/lmsys_vicuna-7b-v1.5/26c5c07e-8482-44b4-8f11-a602e79fb730.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lmsys_vicuna-7b-v1.5/1762652580.327009", - "retrieved_timestamp": "1762652580.3270102", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lmsys/vicuna-7b-v1.5", - "developer": "lmsys", - "inference_platform": "unknown", - "id": "lmsys/vicuna-7b-v1.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23515716077784724 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39470436842233775 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42311458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21467752659574468 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lodrick-the-lafted/lodrick-the-lafted_llama-3.1-8b-instruct-ortho-v7/81d006e2-3be1-4941-bf85-74f1b313c7d7.json b/leaderboard_data/HFOpenLLMv2/lodrick-the-lafted/lodrick-the-lafted_llama-3.1-8b-instruct-ortho-v7/81d006e2-3be1-4941-bf85-74f1b313c7d7.json deleted file mode 100644 index 5275e4de3ba3ad2f38db9149c4f3f6396e63b3f1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lodrick-the-lafted/lodrick-the-lafted_llama-3.1-8b-instruct-ortho-v7/81d006e2-3be1-4941-bf85-74f1b313c7d7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lodrick-the-lafted_llama-3.1-8b-instruct-ortho-v7/1762652580.327225", - "retrieved_timestamp": "1762652580.3272262", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lodrick-the-lafted/llama-3.1-8b-instruct-ortho-v7", - "developer": "lodrick-the-lafted", - "inference_platform": "unknown", - "id": "lodrick-the-lafted/llama-3.1-8b-instruct-ortho-v7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3514618988727687 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39069140261362917 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36159375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1973902925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lordjia/lordjia_Llama-3-Cantonese-8B-Instruct/f453cb41-346c-48b4-a660-64f13ec69fe4.json b/leaderboard_data/HFOpenLLMv2/lordjia/lordjia_Llama-3-Cantonese-8B-Instruct/f453cb41-346c-48b4-a660-64f13ec69fe4.json deleted file mode 100644 index 502d472c4372f4dcd5db06df4671314683a3d284..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lordjia/lordjia_Llama-3-Cantonese-8B-Instruct/f453cb41-346c-48b4-a660-64f13ec69fe4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lordjia_Llama-3-Cantonese-8B-Instruct/1762652580.3274932", - "retrieved_timestamp": "1762652580.3274932", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lordjia/Llama-3-Cantonese-8B-Instruct", - "developer": "lordjia", - "inference_platform": "unknown", - "id": "lordjia/Llama-3-Cantonese-8B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6669259786256023 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4814148018954038 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40460416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35147938829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lordjia/lordjia_Qwen2-Cantonese-7B-Instruct/869339ec-939c-4222-b178-533c3ca5b0d1.json b/leaderboard_data/HFOpenLLMv2/lordjia/lordjia_Qwen2-Cantonese-7B-Instruct/869339ec-939c-4222-b178-533c3ca5b0d1.json deleted file mode 100644 index 29f706780730c81ecc5fa265626f3c023c640328..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lordjia/lordjia_Qwen2-Cantonese-7B-Instruct/869339ec-939c-4222-b178-533c3ca5b0d1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lordjia_Qwen2-Cantonese-7B-Instruct/1762652580.3277462", - "retrieved_timestamp": "1762652580.3277462", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lordjia/Qwen2-Cantonese-7B-Instruct", - "developer": "lordjia", - "inference_platform": "unknown", - "id": "lordjia/Qwen2-Cantonese-7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5435278394659503 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5215311346221223 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25604229607250756 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40038541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38430851063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lt-asset/lt-asset_nova-1.3b/4c3005e9-fffd-491b-8ce1-58204986b787.json b/leaderboard_data/HFOpenLLMv2/lt-asset/lt-asset_nova-1.3b/4c3005e9-fffd-491b-8ce1-58204986b787.json deleted file mode 100644 index 18964697f51c8835d3cdc1c4603f4c9014a96b02..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lt-asset/lt-asset_nova-1.3b/4c3005e9-fffd-491b-8ce1-58204986b787.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lt-asset_nova-1.3b/1762652580.3279538", - "retrieved_timestamp": "1762652580.327955", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lt-asset/nova-1.3b", - "developer": "lt-asset", - "inference_platform": "unknown", - "id": "lt-asset/nova-1.3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1214255951985177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31700122104895806 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36978125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11419547872340426 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "NovaForCausalLM", - "params_billions": 1.347 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lunahr/lunahr_thea-3b-50r-u1/977449d7-d8f0-4e32-b56c-8950006a09a4.json b/leaderboard_data/HFOpenLLMv2/lunahr/lunahr_thea-3b-50r-u1/977449d7-d8f0-4e32-b56c-8950006a09a4.json deleted file mode 100644 index 6878b1ae4fbb14fd9a58429858d0353c628dbc50..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lunahr/lunahr_thea-3b-50r-u1/977449d7-d8f0-4e32-b56c-8950006a09a4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lunahr_thea-3b-50r-u1/1762652580.328209", - "retrieved_timestamp": "1762652580.328209", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lunahr/thea-3b-50r-u1", - "developer": "lunahr", - "inference_platform": "unknown", - "id": "lunahr/thea-3b-50r-u1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6030288523340293 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41046731029294475 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3181875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2808344414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/lunahr/lunahr_thea-v2-3b-50r/03d675d8-ee8d-47de-8bf3-ef386bd8a88f.json b/leaderboard_data/HFOpenLLMv2/lunahr/lunahr_thea-v2-3b-50r/03d675d8-ee8d-47de-8bf3-ef386bd8a88f.json deleted file mode 100644 index 781f5f5673166af4eeeaeaade4f62da700eb3afd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/lunahr/lunahr_thea-v2-3b-50r/03d675d8-ee8d-47de-8bf3-ef386bd8a88f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lunahr_thea-v2-3b-50r/1762652580.328458", - "retrieved_timestamp": "1762652580.328459", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lunahr/thea-v2-3b-50r", - "developer": "lunahr", - "inference_platform": "unknown", - "id": "lunahr/thea-v2-3b-50r" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.370396104558128 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4194416192911743 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2409408244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/macadeliccc/macadeliccc_magistrate-3.2-3b-base/e0f596ba-89ee-4fa7-b5dc-698c2a5fda95.json b/leaderboard_data/HFOpenLLMv2/macadeliccc/macadeliccc_magistrate-3.2-3b-base/e0f596ba-89ee-4fa7-b5dc-698c2a5fda95.json deleted file mode 100644 index 28187aca6de4a81ade50435e1c335b0508884b80..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/macadeliccc/macadeliccc_magistrate-3.2-3b-base/e0f596ba-89ee-4fa7-b5dc-698c2a5fda95.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/macadeliccc_magistrate-3.2-3b-base/1762652580.32929", - "retrieved_timestamp": "1762652580.329291", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "macadeliccc/magistrate-3.2-3b-base", - "developer": "macadeliccc", - "inference_platform": "unknown", - "id": "macadeliccc/magistrate-3.2-3b-base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1159301763764589 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3342701056047533 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39759374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16888297872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/macadeliccc/macadeliccc_magistrate-3.2-3b-it/df26db97-8e5e-409e-937d-45951c81a8cd.json b/leaderboard_data/HFOpenLLMv2/macadeliccc/macadeliccc_magistrate-3.2-3b-it/df26db97-8e5e-409e-937d-45951c81a8cd.json deleted file mode 100644 index 9b5a576d9b3020b462f74b48237425d81acc6e75..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/macadeliccc/macadeliccc_magistrate-3.2-3b-it/df26db97-8e5e-409e-937d-45951c81a8cd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/macadeliccc_magistrate-3.2-3b-it/1762652580.329552", - "retrieved_timestamp": "1762652580.329552", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "macadeliccc/magistrate-3.2-3b-it", - "developer": "macadeliccc", - "inference_platform": "unknown", - "id": "macadeliccc/magistrate-3.2-3b-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22918744486850445 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3256506790327196 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3763229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15924202127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/maldv/maldv_Awqward2.5-32B-Instruct/8b330a87-7689-45ae-a005-0349e09f07ac.json b/leaderboard_data/HFOpenLLMv2/maldv/maldv_Awqward2.5-32B-Instruct/8b330a87-7689-45ae-a005-0349e09f07ac.json deleted file mode 100644 index e61e748387f256d3ba6e5154d12f1d14f22df55c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/maldv/maldv_Awqward2.5-32B-Instruct/8b330a87-7689-45ae-a005-0349e09f07ac.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/maldv_Awqward2.5-32B-Instruct/1762652580.3302772", - "retrieved_timestamp": "1762652580.330278", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "maldv/Awqward2.5-32B-Instruct", - "developer": "maldv", - "inference_platform": "unknown", - "id": "maldv/Awqward2.5-32B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8254697535871487 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6974465506773041 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6231117824773413 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060402684563756 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42748958333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5723071808510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/maldv/maldv_Lytta2.5-32B-Instruct/27575e22-2e66-4177-aa8f-ab4ebd4743ea.json b/leaderboard_data/HFOpenLLMv2/maldv/maldv_Lytta2.5-32B-Instruct/27575e22-2e66-4177-aa8f-ab4ebd4743ea.json deleted file mode 100644 index 9ca83a8e1081c3ea03d3f53c16c8c389bd541bf0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/maldv/maldv_Lytta2.5-32B-Instruct/27575e22-2e66-4177-aa8f-ab4ebd4743ea.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/maldv_Lytta2.5-32B-Instruct/1762652580.3306072", - "retrieved_timestamp": "1762652580.3306088", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "maldv/Lytta2.5-32B-Instruct", - "developer": "maldv", - "inference_platform": "unknown", - "id": "maldv/Lytta2.5-32B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25079455843827714 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.559971089357847 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34441087613293053 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37685416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5048204787234043 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/maldv/maldv_Qwentile2.5-32B-Instruct/f4fde074-8a05-42ec-884c-447b4bfaba39.json b/leaderboard_data/HFOpenLLMv2/maldv/maldv_Qwentile2.5-32B-Instruct/f4fde074-8a05-42ec-884c-447b4bfaba39.json deleted file mode 100644 index 1441b4a4996395f9b807a131158d2ee2c719fdbc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/maldv/maldv_Qwentile2.5-32B-Instruct/f4fde074-8a05-42ec-884c-447b4bfaba39.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/maldv_Qwentile2.5-32B-Instruct/1762652580.3309162", - "retrieved_timestamp": "1762652580.3309171", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "maldv/Qwentile2.5-32B-Instruct", - "developer": "maldv", - "inference_platform": "unknown", - "id": "maldv/Qwentile2.5-32B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7393161256576994 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6962837451098368 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5219033232628398 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38422818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4682291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5879321808510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_Cheng-1/7aa1c718-9ac6-426b-be50-5c7f37849b90.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_Cheng-1/7aa1c718-9ac6-426b-be50-5c7f37849b90.json deleted file mode 100644 index 373f2f6a472554db896c5921a9b9331fd5ceab9f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_Cheng-1/7aa1c718-9ac6-426b-be50-5c7f37849b90.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Cheng-1/1762652580.332221", - "retrieved_timestamp": "1762652580.332222", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/Cheng-1", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/Cheng-1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7788833628106757 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5524677845280024 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48942598187311176 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4073333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43492353723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_Cheng-2-v1.1/a720e9bc-e8dd-4b7a-8d22-7b9f4b42ebe0.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_Cheng-2-v1.1/a720e9bc-e8dd-4b7a-8d22-7b9f4b42ebe0.json deleted file mode 100644 index 9427deddf916b085221d79a81d9d47dc3840b76f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_Cheng-2-v1.1/a720e9bc-e8dd-4b7a-8d22-7b9f4b42ebe0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Cheng-2-v1.1/1762652580.332704", - "retrieved_timestamp": "1762652580.332705", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/Cheng-2-v1.1", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/Cheng-2-v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8269934883885868 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6510142192324059 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5392749244712991 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41672916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5076462765957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_Cheng-2/dbadece3-665b-423b-b2d9-e74d7c676133.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_Cheng-2/dbadece3-665b-423b-b2d9-e74d7c676133.json deleted file mode 100644 index d11a281434e81ad79223f68df966d3f2f8963568..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_Cheng-2/dbadece3-665b-423b-b2d9-e74d7c676133.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_Cheng-2/1762652580.332486", - "retrieved_timestamp": "1762652580.3324869", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/Cheng-2", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/Cheng-2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8337378156624423 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6498988582965893 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5438066465256798 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41933333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5013297872340425 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_absolute-o1-7b/4e9eef3d-b851-41de-a3b2-88950f1d426f.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_absolute-o1-7b/4e9eef3d-b851-41de-a3b2-88950f1d426f.json deleted file mode 100644 index 4c67e18b517461ef10419ba29ddf178c9cd7dfe8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_absolute-o1-7b/4e9eef3d-b851-41de-a3b2-88950f1d426f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_absolute-o1-7b/1762652580.335638", - "retrieved_timestamp": "1762652580.335639", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/absolute-o1-7b", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/absolute-o1-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7515558717536137 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5469413884153854 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5083081570996979 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4113645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44132313829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_cursa-o1-7b-2-28-2025/2a0bcf8c-cf70-4d13-a713-67054bc98412.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_cursa-o1-7b-2-28-2025/2a0bcf8c-cf70-4d13-a713-67054bc98412.json deleted file mode 100644 index 0f8ff93f6043acabd1a7d7512ced7ee4f82c2641..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_cursa-o1-7b-2-28-2025/2a0bcf8c-cf70-4d13-a713-67054bc98412.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_cursa-o1-7b-2-28-2025/1762652580.3360791", - "retrieved_timestamp": "1762652580.3360798", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/cursa-o1-7b-2-28-2025", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/cursa-o1-7b-2-28-2025" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7467098409996586 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.538413713363387 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4811178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42733333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4365026595744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_cursa-o1-7b-v1.1/f24a1f02-da21-49f0-91b9-65df4fd770db.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_cursa-o1-7b-v1.1/f24a1f02-da21-49f0-91b9-65df4fd770db.json deleted file mode 100644 index 718dbed43821e85112f812962dbb8f3d9ee80365..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_cursa-o1-7b-v1.1/f24a1f02-da21-49f0-91b9-65df4fd770db.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_cursa-o1-7b-v1.1/1762652580.336299", - "retrieved_timestamp": "1762652580.3363001", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/cursa-o1-7b-v1.1", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/cursa-o1-7b-v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7527549125209998 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5492557305346194 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.425875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43916223404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_cursa-o1-7b-v1.2-normalize-false/2632f42e-cbe3-4c55-b434-f4a239aeffa4.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_cursa-o1-7b-v1.2-normalize-false/2632f42e-cbe3-4c55-b434-f4a239aeffa4.json deleted file mode 100644 index ac1796aae479c26d9e155e72b8e0ce7f71075c1a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_cursa-o1-7b-v1.2-normalize-false/2632f42e-cbe3-4c55-b434-f4a239aeffa4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_cursa-o1-7b-v1.2-normalize-false/1762652580.3365178", - "retrieved_timestamp": "1762652580.3365178", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/cursa-o1-7b-v1.2-normalize-false", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/cursa-o1-7b-v1.2-normalize-false" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7615726272955757 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5492349810703803 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49924471299093653 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4272708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4435671542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_cursa-o1-7b/0f7f339a-5523-4551-ba77-4fe34779d017.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_cursa-o1-7b/0f7f339a-5523-4551-ba77-4fe34779d017.json deleted file mode 100644 index 946d737133d606d36b3e98905b316c20b395cd93..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_cursa-o1-7b/0f7f339a-5523-4551-ba77-4fe34779d017.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_cursa-o1-7b/1762652580.335863", - "retrieved_timestamp": "1762652580.335863", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/cursa-o1-7b", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/cursa-o1-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7628215357473725 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5465860023973769 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4954682779456193 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4300625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4392453457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_cursor-o1-7b/764c4dcb-caea-418c-b206-ee401ea0d979.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_cursor-o1-7b/764c4dcb-caea-418c-b206-ee401ea0d979.json deleted file mode 100644 index 5f4c75f1b9b4b34c5244dad326b67d5d3d9b246d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_cursor-o1-7b/764c4dcb-caea-418c-b206-ee401ea0d979.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_cursor-o1-7b/1762652580.3367229", - "retrieved_timestamp": "1762652580.336724", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/cursor-o1-7b", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/cursor-o1-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4106880853912065 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5007453242508472 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14123867069486404 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41009375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32513297872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_cursorr-o1.2-7b/51cd189c-82a8-4475-8df5-9a855394274a.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_cursorr-o1.2-7b/51cd189c-82a8-4475-8df5-9a855394274a.json deleted file mode 100644 index 5f8e6d97830123ab230d1a3feb8b96b399114ec4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_cursorr-o1.2-7b/51cd189c-82a8-4475-8df5-9a855394274a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_cursorr-o1.2-7b/1762652580.336929", - "retrieved_timestamp": "1762652580.336929", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/cursorr-o1.2-7b", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/cursorr-o1.2-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1659895743294459 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3068134113454804 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35384375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10804521276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_etr1o-explicit-v1.1/02fe0385-223e-4578-b3fb-d6819f783861.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_etr1o-explicit-v1.1/02fe0385-223e-4578-b3fb-d6819f783861.json deleted file mode 100644 index 647200d3b16c89f6d3e7117d1ca2e9c42c69dcb1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_etr1o-explicit-v1.1/02fe0385-223e-4578-b3fb-d6819f783861.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_etr1o-explicit-v1.1/1762652580.337136", - "retrieved_timestamp": "1762652580.337137", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/etr1o-explicit-v1.1", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/etr1o-explicit-v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28803906966847964 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31316553135589525 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4110520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11951462765957446 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_etr1o-explicit-v1.2/3ec5106d-86be-48a8-bb3d-6574b6971641.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_etr1o-explicit-v1.2/3ec5106d-86be-48a8-bb3d-6574b6971641.json deleted file mode 100644 index a6ad6a89db7d6570ffea2309312ec6750f0c7895..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_etr1o-explicit-v1.2/3ec5106d-86be-48a8-bb3d-6574b6971641.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_etr1o-explicit-v1.2/1762652580.337388", - "retrieved_timestamp": "1762652580.337389", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/etr1o-explicit-v1.2", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/etr1o-explicit-v1.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1504020443534267 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29497368605886115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40311458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11261635638297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_etr1o-v1.1/cd68d6d9-a5c7-4f32-b372-0e954af830ad.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_etr1o-v1.1/cd68d6d9-a5c7-4f32-b372-0e954af830ad.json deleted file mode 100644 index 8fcba36be4a0c4139dddf3fbd090a887cbb63f5b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_etr1o-v1.1/cd68d6d9-a5c7-4f32-b372-0e954af830ad.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_etr1o-v1.1/1762652580.3376079", - "retrieved_timestamp": "1762652580.337609", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/etr1o-v1.1", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/etr1o-v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15971954414287426 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31003625778742805 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40165625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11569148936170212 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_etr1o-v1.2/81b5a281-9dc6-4ae5-8079-d0e308a20c8e.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_etr1o-v1.2/81b5a281-9dc6-4ae5-8079-d0e308a20c8e.json deleted file mode 100644 index ea1c66d2e38a21ce35d14303c988415e87cd649b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_etr1o-v1.2/81b5a281-9dc6-4ae5-8079-d0e308a20c8e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_etr1o-v1.2/1762652580.337824", - "retrieved_timestamp": "1762652580.337825", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/etr1o-v1.2", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/etr1o-v1.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7286998497320443 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6349035922791185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35876132930513593 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37583892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4714479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5315824468085106 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_fan-o1-7b/9693b68f-ac5c-4111-804c-0505ec8bf06d.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_fan-o1-7b/9693b68f-ac5c-4111-804c-0505ec8bf06d.json deleted file mode 100644 index 30bdf8c39829ba66706899f1fd8881d24d515cad..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_fan-o1-7b/9693b68f-ac5c-4111-804c-0505ec8bf06d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_fan-o1-7b/1762652580.338023", - "retrieved_timestamp": "1762652580.338024", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/fan-o1-7b", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/fan-o1-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4455588948434598 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4849058892394324 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16163141993957703 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3833645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3273769946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_olmner-7b/5064ebea-3ec3-4344-867f-e33f8937d096.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_olmner-7b/5064ebea-3ec3-4344-867f-e33f8937d096.json deleted file mode 100644 index 93e4d45763f4e2ee891c6e8ab4477ad0bba2f657..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_olmner-7b/5064ebea-3ec3-4344-867f-e33f8937d096.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_olmner-7b/1762652580.338225", - "retrieved_timestamp": "1762652580.338225", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/olmner-7b", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/olmner-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7253775537795273 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5471591805569388 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43796875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4309341755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_olmner-della-7b/062e407e-7820-459f-83da-b670f8adff9d.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_olmner-della-7b/062e407e-7820-459f-83da-b670f8adff9d.json deleted file mode 100644 index e93c1e6ba889e2d6aa7287ce980bc6060803324e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_olmner-della-7b/062e407e-7820-459f-83da-b670f8adff9d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_olmner-della-7b/1762652580.338445", - "retrieved_timestamp": "1762652580.3384461", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/olmner-della-7b", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/olmner-della-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7636958824807067 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5491231851969524 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4962235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4207604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43858045212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_olmner-o1-7b/b1669ad9-450f-4a93-8094-26f427beb49f.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_olmner-o1-7b/b1669ad9-450f-4a93-8094-26f427beb49f.json deleted file mode 100644 index e4c5d49d780a91eb8941c6f90f7260ea99b6729d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_olmner-o1-7b/b1669ad9-450f-4a93-8094-26f427beb49f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_olmner-o1-7b/1762652580.338658", - "retrieved_timestamp": "1762652580.338659", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/olmner-o1-7b", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/olmner-o1-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7527549125209998 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5480873056178129 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49244712990936557 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42990625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43858045212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_olmner-sbr-7b/afb014ed-a2e6-46b9-9ee9-a6a1f52e43cf.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_olmner-sbr-7b/afb014ed-a2e6-46b9-9ee9-a6a1f52e43cf.json deleted file mode 100644 index c504bf41a4f3cfd61cb32ea5f74093c378d78ea8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_olmner-sbr-7b/afb014ed-a2e6-46b9-9ee9-a6a1f52e43cf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_olmner-sbr-7b/1762652580.338864", - "retrieved_timestamp": "1762652580.3388648", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/olmner-sbr-7b", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/olmner-sbr-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7600488924941378 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5461642048146724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4947129909365559 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4153645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4412400265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_post-cursa-o1/c9632855-db4e-40bb-b140-2ff524d31fd2.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_post-cursa-o1/c9632855-db4e-40bb-b140-2ff524d31fd2.json deleted file mode 100644 index 3355eecf696746f9a52720e5ce936f5153f36fd9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_post-cursa-o1/c9632855-db4e-40bb-b140-2ff524d31fd2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_post-cursa-o1/1762652580.3390641", - "retrieved_timestamp": "1762652580.339065", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/post-cursa-o1", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/post-cursa-o1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7628215357473725 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5479692437233474 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4871601208459215 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43514583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4360871010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_pre-cursa-o1-v1.2/9db3b6b0-7cc8-48b6-85f5-1662cad07fae.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_pre-cursa-o1-v1.2/9db3b6b0-7cc8-48b6-85f5-1662cad07fae.json deleted file mode 100644 index b4a52d66a5dcd7b50df9aed14bb0ef947842d5c9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_pre-cursa-o1-v1.2/9db3b6b0-7cc8-48b6-85f5-1662cad07fae.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_pre-cursa-o1-v1.2/1762652580.339467", - "retrieved_timestamp": "1762652580.339468", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/pre-cursa-o1-v1.2", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/pre-cursa-o1-v1.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7548781677061308 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5486788313377599 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.506797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42723958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4402426861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_pre-cursa-o1-v1.3/f86cf126-4fb3-4419-82bf-e5c0168e25cb.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_pre-cursa-o1-v1.3/f86cf126-4fb3-4419-82bf-e5c0168e25cb.json deleted file mode 100644 index 8b5057118f6de11f63134e471d0ff1f35fed7ee3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_pre-cursa-o1-v1.3/f86cf126-4fb3-4419-82bf-e5c0168e25cb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_pre-cursa-o1-v1.3/1762652580.339683", - "retrieved_timestamp": "1762652580.339684", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/pre-cursa-o1-v1.3", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/pre-cursa-o1-v1.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7506815250202795 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5454519705653261 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5075528700906344 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42714583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4419880319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_pre-cursa-o1-v1.4/4ed1f68a-6bc9-4621-beb1-3d274247cdb6.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_pre-cursa-o1-v1.4/4ed1f68a-6bc9-4621-beb1-3d274247cdb6.json deleted file mode 100644 index 5f3d81adcc403190c675be8ecbfcece3bfd2fcc6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_pre-cursa-o1-v1.4/4ed1f68a-6bc9-4621-beb1-3d274247cdb6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_pre-cursa-o1-v1.4/1762652580.3398788", - "retrieved_timestamp": "1762652580.33988", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/pre-cursa-o1-v1.4", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/pre-cursa-o1-v1.4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.748783228500379 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5493014138981462 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48338368580060426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42851041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4435671542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_pre-cursa-o1-v1.6/50627b31-a8d4-401a-8449-5f33cfb17893.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_pre-cursa-o1-v1.6/50627b31-a8d4-401a-8449-5f33cfb17893.json deleted file mode 100644 index e3ebbc58c28986e1d8ae0c7c83f550144cf274dc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_pre-cursa-o1-v1.6/50627b31-a8d4-401a-8449-5f33cfb17893.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_pre-cursa-o1-v1.6/1762652580.340074", - "retrieved_timestamp": "1762652580.340075", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/pre-cursa-o1-v1.6", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/pre-cursa-o1-v1.6" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7527549125209998 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5473342320067097 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4233645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44132313829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_pre-cursa-o1/51fc3a16-67c2-448b-9854-07ab8adc4dea.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_pre-cursa-o1/51fc3a16-67c2-448b-9854-07ab8adc4dea.json deleted file mode 100644 index fcc9e10f4c4b75ba31df7820c77599f9d44ca981..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_pre-cursa-o1/51fc3a16-67c2-448b-9854-07ab8adc4dea.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_pre-cursa-o1/1762652580.3392608", - "retrieved_timestamp": "1762652580.339262", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/pre-cursa-o1", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/pre-cursa-o1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.740889728143548 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5461688442794247 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5037764350453172 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42596875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4424035904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_r1o-et/84de36db-b427-40c4-80f6-2114c8ad4e4f.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_r1o-et/84de36db-b427-40c4-80f6-2114c8ad4e4f.json deleted file mode 100644 index 3aeefb5bf4204dc133c72c0e55240bdfdaf6bf53..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_r1o-et/84de36db-b427-40c4-80f6-2114c8ad4e4f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_r1o-et/1762652580.340277", - "retrieved_timestamp": "1762652580.340277", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/r1o-et", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/r1o-et" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3596800932636516 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42092007019831174 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07930513595166164 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3579375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2579787234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_sbr-o1-7b/05666c00-3b8c-48f3-9e36-bc9a116bb0c6.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_sbr-o1-7b/05666c00-3b8c-48f3-9e36-bc9a116bb0c6.json deleted file mode 100644 index 084281100e4de057b43b9d950e9bd84b3a83f044..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_sbr-o1-7b/05666c00-3b8c-48f3-9e36-bc9a116bb0c6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_sbr-o1-7b/1762652580.340477", - "retrieved_timestamp": "1762652580.340478", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/sbr-o1-7b", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/sbr-o1-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7454609325478618 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5478826565229475 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4404166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43550531914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_stray-r1o-et/cbf68d01-b993-4bcd-b174-23e3b6e28d3a.json b/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_stray-r1o-et/cbf68d01-b993-4bcd-b174-23e3b6e28d3a.json deleted file mode 100644 index b087ed0874ff703728aaa2afa204b668ef850874..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/marcuscedricridia/marcuscedricridia_stray-r1o-et/cbf68d01-b993-4bcd-b174-23e3b6e28d3a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/marcuscedricridia_stray-r1o-et/1762652580.340682", - "retrieved_timestamp": "1762652580.340683", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "marcuscedricridia/stray-r1o-et", - "developer": "marcuscedricridia", - "inference_platform": "unknown", - "id": "marcuscedricridia/stray-r1o-et" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15622215720953736 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2967459956151434 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4085729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.109375 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/matouLeLoup/matouLeLoup_ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3/4800a6d0-8458-405a-95ca-6d0690a8f769.json b/leaderboard_data/HFOpenLLMv2/matouLeLoup/matouLeLoup_ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3/4800a6d0-8458-405a-95ca-6d0690a8f769.json deleted file mode 100644 index 88ab5e6e27ab8cc90461beba25f498184aae2330..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/matouLeLoup/matouLeLoup_ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3/4800a6d0-8458-405a-95ca-6d0690a8f769.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/matouLeLoup_ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3/1762652580.340896", - "retrieved_timestamp": "1762652580.340897", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3", - "developer": "matouLeLoup", - "inference_platform": "unknown", - "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18732186154957736 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3239117424825444 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37520833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17195811170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/matouLeLoup/matouLeLoup_ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis/95c9ef47-8194-4c00-bbea-a65a7715f9f3.json b/leaderboard_data/HFOpenLLMv2/matouLeLoup/matouLeLoup_ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis/95c9ef47-8194-4c00-bbea-a65a7715f9f3.json deleted file mode 100644 index 776ef5db45b10db903b7640ed6f86b407b9cee3e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/matouLeLoup/matouLeLoup_ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis/95c9ef47-8194-4c00-bbea-a65a7715f9f3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/matouLeLoup_ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis/1762652580.3411388", - "retrieved_timestamp": "1762652580.34114", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis", - "developer": "matouLeLoup", - "inference_platform": "unknown", - "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18732186154957736 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3239117424825444 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37520833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17195811170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/matouLeLoup/matouLeLoup_ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis/b88d6df2-5642-4837-bf04-4d804a4ba3c4.json b/leaderboard_data/HFOpenLLMv2/matouLeLoup/matouLeLoup_ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis/b88d6df2-5642-4837-bf04-4d804a4ba3c4.json deleted file mode 100644 index ad9bbdc950cbf3da4848fa7c8445bff97ecb18da..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/matouLeLoup/matouLeLoup_ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis/b88d6df2-5642-4837-bf04-4d804a4ba3c4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/matouLeLoup_ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis/1762652580.341354", - "retrieved_timestamp": "1762652580.341354", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis", - "developer": "matouLeLoup", - "inference_platform": "unknown", - "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18732186154957736 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3239117424825444 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37520833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17195811170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/matouLeLoup/matouLeLoup_ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis/679f1499-572e-4f60-9b2d-4c8199d71107.json b/leaderboard_data/HFOpenLLMv2/matouLeLoup/matouLeLoup_ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis/679f1499-572e-4f60-9b2d-4c8199d71107.json deleted file mode 100644 index d3388eca34f3abe0ff379bcd272ed1d2724bca79..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/matouLeLoup/matouLeLoup_ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis/679f1499-572e-4f60-9b2d-4c8199d71107.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/matouLeLoup_ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis/1762652580.341564", - "retrieved_timestamp": "1762652580.341565", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis", - "developer": "matouLeLoup", - "inference_platform": "unknown", - "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18824607596732226 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32327887380902803 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3684791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17204122340425532 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/matouLeLoup/matouLeLoup_ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/8da1b04b-c3a8-4554-bcb5-0e08dcfd7483.json b/leaderboard_data/HFOpenLLMv2/matouLeLoup/matouLeLoup_ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/8da1b04b-c3a8-4554-bcb5-0e08dcfd7483.json deleted file mode 100644 index 453294a96a263ae78a812fe96c404eecd8b41eb9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/matouLeLoup/matouLeLoup_ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/8da1b04b-c3a8-4554-bcb5-0e08dcfd7483.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/matouLeLoup_ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/1762652580.3417778", - "retrieved_timestamp": "1762652580.341779", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis", - "developer": "matouLeLoup", - "inference_platform": "unknown", - "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16521496296493304 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30237295164613204 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42730208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1116190159574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 0.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mattshumer/mattshumer_ref_70_e3/8ab597da-85ec-45d5-b5e2-f51ca8a2f3c9.json b/leaderboard_data/HFOpenLLMv2/mattshumer/mattshumer_ref_70_e3/8ab597da-85ec-45d5-b5e2-f51ca8a2f3c9.json deleted file mode 100644 index 083a79de46066222be68794804d85427e129deca..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mattshumer/mattshumer_ref_70_e3/8ab597da-85ec-45d5-b5e2-f51ca8a2f3c9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mattshumer_ref_70_e3/1762652580.342239", - "retrieved_timestamp": "1762652580.34224", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mattshumer/ref_70_e3", - "developer": "mattshumer", - "inference_platform": "unknown", - "id": "mattshumer/ref_70_e3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6294321289733462 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6500839481104265 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2794561933534743 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4327604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5302526595744681 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meditsolutions/meditsolutions_Llama-3.2-SUN-1B-Instruct/f4c341cb-6489-49a1-9532-6b78c2238b2a.json b/leaderboard_data/HFOpenLLMv2/meditsolutions/meditsolutions_Llama-3.2-SUN-1B-Instruct/f4c341cb-6489-49a1-9532-6b78c2238b2a.json deleted file mode 100644 index ca210f3d73a1ed716baa6a424f905c8d709b89a5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meditsolutions/meditsolutions_Llama-3.2-SUN-1B-Instruct/f4c341cb-6489-49a1-9532-6b78c2238b2a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-1B-Instruct/1762652580.343025", - "retrieved_timestamp": "1762652580.343026", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meditsolutions/Llama-3.2-SUN-1B-Instruct", - "developer": "meditsolutions", - "inference_platform": "unknown", - "id": "meditsolutions/Llama-3.2-SUN-1B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6412973133507981 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34738999022447486 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07099697885196375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2424496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35136458333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17810837765957446 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaMedITForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meditsolutions/meditsolutions_Llama-3.2-SUN-1B-chat/7e72df4d-7a54-4e11-b4a2-44224db285ec.json b/leaderboard_data/HFOpenLLMv2/meditsolutions/meditsolutions_Llama-3.2-SUN-1B-chat/7e72df4d-7a54-4e11-b4a2-44224db285ec.json deleted file mode 100644 index 670d1357ff544b0c7f0c8047a69a77c911681722..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meditsolutions/meditsolutions_Llama-3.2-SUN-1B-chat/7e72df4d-7a54-4e11-b4a2-44224db285ec.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-1B-chat/1762652580.343276", - "retrieved_timestamp": "1762652580.343277", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meditsolutions/Llama-3.2-SUN-1B-chat", - "developer": "meditsolutions", - "inference_platform": "unknown", - "id": "meditsolutions/Llama-3.2-SUN-1B-chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5481743994822625 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35144575516411386 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3249166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18375997340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meditsolutions/meditsolutions_Llama-3.2-SUN-2.5B-chat/7385392b-79e9-4764-9326-d7bc1586b918.json b/leaderboard_data/HFOpenLLMv2/meditsolutions/meditsolutions_Llama-3.2-SUN-2.5B-chat/7385392b-79e9-4764-9326-d7bc1586b918.json deleted file mode 100644 index c42f944fd182656516d9bbacf75ab26509f014d4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meditsolutions/meditsolutions_Llama-3.2-SUN-2.5B-chat/7385392b-79e9-4764-9326-d7bc1586b918.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-2.5B-chat/1762652580.344106", - "retrieved_timestamp": "1762652580.344107", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meditsolutions/Llama-3.2-SUN-2.5B-chat", - "developer": "meditsolutions", - "inference_platform": "unknown", - "id": "meditsolutions/Llama-3.2-SUN-2.5B-chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.560414145578177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3574734302161124 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07099697885196375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3155208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1813497340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.472 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meditsolutions/meditsolutions_Llama-3.2-SUN-HDIC-1B-Instruct/ac6f2c5a-32b7-4553-acaa-e329f1916c85.json b/leaderboard_data/HFOpenLLMv2/meditsolutions/meditsolutions_Llama-3.2-SUN-HDIC-1B-Instruct/ac6f2c5a-32b7-4553-acaa-e329f1916c85.json deleted file mode 100644 index 5b9c5c9b8d0ce0ddbcf12634ce6133147097a163..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meditsolutions/meditsolutions_Llama-3.2-SUN-HDIC-1B-Instruct/ac6f2c5a-32b7-4553-acaa-e329f1916c85.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-HDIC-1B-Instruct/1762652580.344357", - "retrieved_timestamp": "1762652580.344363", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meditsolutions/Llama-3.2-SUN-HDIC-1B-Instruct", - "developer": "meditsolutions", - "inference_platform": "unknown", - "id": "meditsolutions/Llama-3.2-SUN-HDIC-1B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6826631116548536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3507731670753292 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23657718120805368 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3593645833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16871675531914893 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meditsolutions/meditsolutions_MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune/ff57f4fa-eb78-4ef4-9d92-9f160a1b936a.json b/leaderboard_data/HFOpenLLMv2/meditsolutions/meditsolutions_MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune/ff57f4fa-eb78-4ef4-9d92-9f160a1b936a.json deleted file mode 100644 index e5654b0a7b70e9b8ab144246d5899eb0a0127370..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meditsolutions/meditsolutions_MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune/ff57f4fa-eb78-4ef4-9d92-9f160a1b936a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meditsolutions_MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune/1762652580.344661", - "retrieved_timestamp": "1762652580.344662", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meditsolutions/MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune", - "developer": "meditsolutions", - "inference_platform": "unknown", - "id": "meditsolutions/MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36550020611976225 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4034845834509661 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42534374999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21899933510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.646 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meditsolutions/meditsolutions_MSH-v1-Bielik-v2.3-Instruct-MedIT-merge/a7e4718c-c4cf-4c0f-b67f-fd12fa54e4ad.json b/leaderboard_data/HFOpenLLMv2/meditsolutions/meditsolutions_MSH-v1-Bielik-v2.3-Instruct-MedIT-merge/a7e4718c-c4cf-4c0f-b67f-fd12fa54e4ad.json deleted file mode 100644 index 5d25118584ec4be927a34fa884de5e7cf29ba2ce..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meditsolutions/meditsolutions_MSH-v1-Bielik-v2.3-Instruct-MedIT-merge/a7e4718c-c4cf-4c0f-b67f-fd12fa54e4ad.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meditsolutions_MSH-v1-Bielik-v2.3-Instruct-MedIT-merge/1762652580.344883", - "retrieved_timestamp": "1762652580.344884", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meditsolutions/MSH-v1-Bielik-v2.3-Instruct-MedIT-merge", - "developer": "meditsolutions", - "inference_platform": "unknown", - "id": "meditsolutions/MSH-v1-Bielik-v2.3-Instruct-MedIT-merge" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5814217387642566 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5671722290858499 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20770392749244712 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43845833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3499833776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 11.169 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meditsolutions/meditsolutions_MedIT-Mesh-3B-Instruct/89568570-298f-4dc5-9b7b-c9ce84d4010e.json b/leaderboard_data/HFOpenLLMv2/meditsolutions/meditsolutions_MedIT-Mesh-3B-Instruct/89568570-298f-4dc5-9b7b-c9ce84d4010e.json deleted file mode 100644 index 410842fad662aee3614a615cbf81050042bbfd37..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meditsolutions/meditsolutions_MedIT-Mesh-3B-Instruct/89568570-298f-4dc5-9b7b-c9ce84d4010e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meditsolutions_MedIT-Mesh-3B-Instruct/1762652580.345099", - "retrieved_timestamp": "1762652580.345099", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meditsolutions/MedIT-Mesh-3B-Instruct", - "developer": "meditsolutions", - "inference_platform": "unknown", - "id": "meditsolutions/MedIT-Mesh-3B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5814217387642566 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5575523356865378 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4047604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4011801861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meditsolutions/meditsolutions_SmolLM2-MedIT-Upscale-2B/d78a23ac-c3f1-4ad5-bbd2-ea37faea455f.json b/leaderboard_data/HFOpenLLMv2/meditsolutions/meditsolutions_SmolLM2-MedIT-Upscale-2B/d78a23ac-c3f1-4ad5-bbd2-ea37faea455f.json deleted file mode 100644 index c7a78fc1088bdb9bf90f14cda5b5b90ccbd19d25..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meditsolutions/meditsolutions_SmolLM2-MedIT-Upscale-2B/d78a23ac-c3f1-4ad5-bbd2-ea37faea455f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meditsolutions_SmolLM2-MedIT-Upscale-2B/1762652580.3453178", - "retrieved_timestamp": "1762652580.3453188", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meditsolutions/SmolLM2-MedIT-Upscale-2B", - "developer": "meditsolutions", - "inference_platform": "unknown", - "id": "meditsolutions/SmolLM2-MedIT-Upscale-2B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6429207835210575 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3551122445928012 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33136458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19705784574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.114 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meetkai/meetkai_functionary-small-v3.1/7312a4c6-85e2-4cb3-9c3e-1dfc039d1c3a.json b/leaderboard_data/HFOpenLLMv2/meetkai/meetkai_functionary-small-v3.1/7312a4c6-85e2-4cb3-9c3e-1dfc039d1c3a.json deleted file mode 100644 index 688a6031c01db078530708fb398049431fb32ee0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meetkai/meetkai_functionary-small-v3.1/7312a4c6-85e2-4cb3-9c3e-1dfc039d1c3a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meetkai_functionary-small-v3.1/1762652580.345532", - "retrieved_timestamp": "1762652580.345533", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meetkai/functionary-small-v3.1", - "developer": "meetkai", - "inference_platform": "unknown", - "id": "meetkai/functionary-small-v3.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6274584768414474 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4981781042779377 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15709969788519637 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3833645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33485704787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_JAJUKA-WEWILLNEVERFORGETYOU-3B/c948d98a-af63-43d6-a7c9-9ee61654a239.json b/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_JAJUKA-WEWILLNEVERFORGETYOU-3B/c948d98a-af63-43d6-a7c9-9ee61654a239.json deleted file mode 100644 index ed9d0eed81603721a75ed9076711dd47230e7d78..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_JAJUKA-WEWILLNEVERFORGETYOU-3B/c948d98a-af63-43d6-a7c9-9ee61654a239.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mergekit-community_JAJUKA-WEWILLNEVERFORGETYOU-3B/1762652580.346048", - "retrieved_timestamp": "1762652580.346048", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mergekit-community/JAJUKA-WEWILLNEVERFORGETYOU-3B", - "developer": "mergekit-community", - "inference_platform": "unknown", - "id": "mergekit-community/JAJUKA-WEWILLNEVERFORGETYOU-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49406907006742107 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.436971949757697 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36562500000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3032746010638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_VirtuosoSmall-InstructModelStock/8c7e09ef-ac37-4765-9f1e-a1b17ff4b084.json b/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_VirtuosoSmall-InstructModelStock/8c7e09ef-ac37-4765-9f1e-a1b17ff4b084.json deleted file mode 100644 index 319f5b7f759bd76ed294595eb5e79b877d86aa88..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_VirtuosoSmall-InstructModelStock/8c7e09ef-ac37-4765-9f1e-a1b17ff4b084.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mergekit-community_VirtuosoSmall-InstructModelStock/1762652580.346572", - "retrieved_timestamp": "1762652580.346573", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mergekit-community/VirtuosoSmall-InstructModelStock", - "developer": "mergekit-community", - "inference_platform": "unknown", - "id": "mergekit-community/VirtuosoSmall-InstructModelStock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5237946426592552 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6517899193567194 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4093655589123867 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3825503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4755729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5420545212765957 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_diabolic6045_ELN-AOC-CAIN/c87fbaff-133e-4312-87bf-d2fa397d66c4.json b/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_diabolic6045_ELN-AOC-CAIN/c87fbaff-133e-4312-87bf-d2fa397d66c4.json deleted file mode 100644 index 57ae9775d668d0a9f7cf4bbc20ecb5d963e8df86..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_diabolic6045_ELN-AOC-CAIN/c87fbaff-133e-4312-87bf-d2fa397d66c4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mergekit-community_diabolic6045_ELN-AOC-CAIN/1762652580.346791", - "retrieved_timestamp": "1762652580.346791", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mergekit-community/diabolic6045_ELN-AOC-CAIN", - "developer": "mergekit-community", - "inference_platform": "unknown", - "id": "mergekit-community/diabolic6045_ELN-AOC-CAIN" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0861547361002141 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31256779393862577 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36575 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11909906914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_mergekit-dare_ties-ajgjgea/69409961-b60d-4616-8a8e-8d0a9c6c966f.json b/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_mergekit-dare_ties-ajgjgea/69409961-b60d-4616-8a8e-8d0a9c6c966f.json deleted file mode 100644 index a930941d5d9347189969364129c7ef8184394f1e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_mergekit-dare_ties-ajgjgea/69409961-b60d-4616-8a8e-8d0a9c6c966f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mergekit-community_mergekit-dare_ties-ajgjgea/1762652580.347229", - "retrieved_timestamp": "1762652580.34723", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mergekit-community/mergekit-dare_ties-ajgjgea", - "developer": "mergekit-community", - "inference_platform": "unknown", - "id": "mergekit-community/mergekit-dare_ties-ajgjgea" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5263423272472595 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3494703687455365 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3289166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17436835106382978 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_mergekit-della-zgowfmf/2989b505-bfe2-4ca6-9445-af450ad9bee3.json b/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_mergekit-della-zgowfmf/2989b505-bfe2-4ca6-9445-af450ad9bee3.json deleted file mode 100644 index 0dc3db4668930d40b95e8bba9ef1e925944af057..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_mergekit-della-zgowfmf/2989b505-bfe2-4ca6-9445-af450ad9bee3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mergekit-community_mergekit-della-zgowfmf/1762652580.347496", - "retrieved_timestamp": "1762652580.347497", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mergekit-community/mergekit-della-zgowfmf", - "developer": "mergekit-community", - "inference_platform": "unknown", - "id": "mergekit-community/mergekit-della-zgowfmf" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4827535383892516 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6590790528029254 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36178247734138974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3901006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4833854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5414727393617021 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_mergekit-model_stock-azgztvm/5a607a63-42bc-4f2b-af2f-4126234516d0.json b/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_mergekit-model_stock-azgztvm/5a607a63-42bc-4f2b-af2f-4126234516d0.json deleted file mode 100644 index dc700904de5a2b82d40c2c82d3d22fe3ec3fb460..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_mergekit-model_stock-azgztvm/5a607a63-42bc-4f2b-af2f-4126234516d0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mergekit-community_mergekit-model_stock-azgztvm/1762652580.347734", - "retrieved_timestamp": "1762652580.347735", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mergekit-community/mergekit-model_stock-azgztvm", - "developer": "mergekit-community", - "inference_platform": "unknown", - "id": "mergekit-community/mergekit-model_stock-azgztvm" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5061592131101034 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6542775546755846 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43731117824773413 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47300000000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5405585106382979 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_mergekit-slerp-fmrazcr/5fd04483-684e-4991-adea-ca5496e05208.json b/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_mergekit-slerp-fmrazcr/5fd04483-684e-4991-adea-ca5496e05208.json deleted file mode 100644 index 64b3b98195c35d15a9f65aa193e80fc7e340b7de..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_mergekit-slerp-fmrazcr/5fd04483-684e-4991-adea-ca5496e05208.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mergekit-community_mergekit-slerp-fmrazcr/1762652580.3479838", - "retrieved_timestamp": "1762652580.3479848", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mergekit-community/mergekit-slerp-fmrazcr", - "developer": "mergekit-community", - "inference_platform": "unknown", - "id": "mergekit-community/mergekit-slerp-fmrazcr" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41743241266506204 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5341624678276029 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41045833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3776595744680851 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_mergekit-ties-rraxdhv/bb3ccfe9-1ae3-49ec-9305-9150edaf8527.json b/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_mergekit-ties-rraxdhv/bb3ccfe9-1ae3-49ec-9305-9150edaf8527.json deleted file mode 100644 index a1de47f42941ba7d7d5148e62d485039c5343dc4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_mergekit-ties-rraxdhv/bb3ccfe9-1ae3-49ec-9305-9150edaf8527.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mergekit-community_mergekit-ties-rraxdhv/1762652580.348219", - "retrieved_timestamp": "1762652580.3482199", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mergekit-community/mergekit-ties-rraxdhv", - "developer": "mergekit-community", - "inference_platform": "unknown", - "id": "mergekit-community/mergekit-ties-rraxdhv" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11230756614671294 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5183590984128971 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42019791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39095744680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_mergekit-ties-ykqemwr/83a86bdd-4605-44a5-8168-ce88242c4ee6.json b/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_mergekit-ties-ykqemwr/83a86bdd-4605-44a5-8168-ce88242c4ee6.json deleted file mode 100644 index d92dcb28081aa7245963e25cd519a9024e2a00ab..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_mergekit-ties-ykqemwr/83a86bdd-4605-44a5-8168-ce88242c4ee6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mergekit-community_mergekit-ties-ykqemwr/1762652580.3485382", - "retrieved_timestamp": "1762652580.3485398", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mergekit-community/mergekit-ties-ykqemwr", - "developer": "mergekit-community", - "inference_platform": "unknown", - "id": "mergekit-community/mergekit-ties-ykqemwr" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35995491961329273 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5455496677885336 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12235649546827794 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4197916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3734208776595745 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_sexeh_time_testing/79cd4642-8b10-416b-8a24-e3e3dc99b28f.json b/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_sexeh_time_testing/79cd4642-8b10-416b-8a24-e3e3dc99b28f.json deleted file mode 100644 index 8ea2aa0063ffaf59afb648978d2309e07abdad19..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mergekit-community/mergekit-community_sexeh_time_testing/79cd4642-8b10-416b-8a24-e3e3dc99b28f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mergekit-community_sexeh_time_testing/1762652580.348824", - "retrieved_timestamp": "1762652580.348825", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mergekit-community/sexeh_time_testing", - "developer": "mergekit-community", - "inference_platform": "unknown", - "id": "mergekit-community/sexeh_time_testing" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7329463601023063 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5241321549202608 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36190625000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36668882978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Llama-2-13b-chat-hf/1d97c368-3e12-43d4-afb2-e3977bf7cf35.json b/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Llama-2-13b-chat-hf/1d97c368-3e12-43d4-afb2-e3977bf7cf35.json deleted file mode 100644 index 5ddce8857673b86a0926077bef0a1eca9bea5138..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Llama-2-13b-chat-hf/1d97c368-3e12-43d4-afb2-e3977bf7cf35.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-2-13b-chat-hf/1762652580.34908", - "retrieved_timestamp": "1762652580.349081", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meta-llama/Llama-2-13b-chat-hf", - "developer": "meta-llama", - "inference_platform": "unknown", - "id": "meta-llama/Llama-2-13b-chat-hf" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.398472719052115 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33427367066714186 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23154362416107382 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40072916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19232047872340424 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.016 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Llama-2-70b-chat-hf/51411c24-49a4-48a7-9079-1f8c06e5318f.json b/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Llama-2-70b-chat-hf/51411c24-49a4-48a7-9079-1f8c06e5318f.json deleted file mode 100644 index 15050cd0704b7cca23f6da346dadeedab1ea6c39..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Llama-2-70b-chat-hf/51411c24-49a4-48a7-9079-1f8c06e5318f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-2-70b-chat-hf/1762652580.3497758", - "retrieved_timestamp": "1762652580.349777", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meta-llama/Llama-2-70b-chat-hf", - "developer": "meta-llama", - "inference_platform": "unknown", - "id": "meta-llama/Llama-2-70b-chat-hf" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49579227560650185 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30424741461642657 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2432679521276596 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 68.977 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Llama-2-7b-chat-hf/3c870b5c-ab3f-4a21-836a-655d0e30efb9.json b/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Llama-2-7b-chat-hf/3c870b5c-ab3f-4a21-836a-655d0e30efb9.json deleted file mode 100644 index 00ec9e8f8578e1a38b55d033ec71be84f7230e6e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Llama-2-7b-chat-hf/3c870b5c-ab3f-4a21-836a-655d0e30efb9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-2-7b-chat-hf/1762652580.350235", - "retrieved_timestamp": "1762652580.350236", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meta-llama/Llama-2-7b-chat-hf", - "developer": "meta-llama", - "inference_platform": "unknown", - "id": "meta-llama/Llama-2-7b-chat-hf" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3986478100329348 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3113546355002185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3675520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16879986702127658 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Llama-3.1-70B-Instruct/5623efdd-2f43-49d3-9e89-21432db474f4.json b/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Llama-3.1-70B-Instruct/5623efdd-2f43-49d3-9e89-21432db474f4.json deleted file mode 100644 index db2fc26c5e70d4b3b20c8ea7352a77180f9e1065..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Llama-3.1-70B-Instruct/5623efdd-2f43-49d3-9e89-21432db474f4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.1-70B-Instruct/1762652580.35089", - "retrieved_timestamp": "1762652580.350891", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meta-llama/Llama-3.1-70B-Instruct", - "developer": "meta-llama", - "inference_platform": "unknown", - "id": "meta-llama/Llama-3.1-70B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8668854195756149 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6917287453663654 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3806646525679758 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565436241610738 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45806250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5309175531914894 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Llama-3.1-8B-Instruct/b5009142-e716-45b2-877e-9259a3a705da.json b/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Llama-3.1-8B-Instruct/b5009142-e716-45b2-877e-9259a3a705da.json deleted file mode 100644 index ca24622230d8310b43141e27029bf32256feb507..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Llama-3.1-8B-Instruct/b5009142-e716-45b2-877e-9259a3a705da.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.1-8B-Instruct/1762652580.351296", - "retrieved_timestamp": "1762652580.3512971", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meta-llama/Llama-3.1-8B-Instruct", - "developer": "meta-llama", - "inference_platform": "unknown", - "id": "meta-llama/Llama-3.1-8B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4921707735475206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5087032184331889 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1555891238670695 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39715625000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37982047872340424 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Llama-3.2-1B-Instruct/b21f94af-3dfd-42f6-a380-3c5faebc90d8.json b/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Llama-3.2-1B-Instruct/b21f94af-3dfd-42f6-a380-3c5faebc90d8.json deleted file mode 100644 index 1c9da221832a8336ca48434c13a26d0319362b3a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Llama-3.2-1B-Instruct/b21f94af-3dfd-42f6-a380-3c5faebc90d8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.2-1B-Instruct/1762652580.351711", - "retrieved_timestamp": "1762652580.351712", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meta-llama/Llama-3.2-1B-Instruct", - "developer": "meta-llama", - "inference_platform": "unknown", - "id": "meta-llama/Llama-3.2-1B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5698313807364459 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34968498061768266 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3328541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16821808510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.24 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Llama-3.2-3B-Instruct/ec976588-9788-45e0-ae89-4682e3c8799a.json b/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Llama-3.2-3B-Instruct/ec976588-9788-45e0-ae89-4682e3c8799a.json deleted file mode 100644 index 6e2e498382cb6b3ad1445f56ca18fac7badf51b3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Llama-3.2-3B-Instruct/ec976588-9788-45e0-ae89-4682e3c8799a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.2-3B-Instruct/1762652580.352124", - "retrieved_timestamp": "1762652580.352124", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meta-llama/Llama-3.2-3B-Instruct", - "developer": "meta-llama", - "inference_platform": "unknown", - "id": "meta-llama/Llama-3.2-3B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7393161256576994 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4610070239466069 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17673716012084592 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3528541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3194813829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Llama-3.3-70B-Instruct/b227d987-1bec-4124-955a-d81e2e2a52f6.json b/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Llama-3.3-70B-Instruct/b227d987-1bec-4124-955a-d81e2e2a52f6.json deleted file mode 100644 index 24d04574571cc53abeb9b1d1b2d045c659a5fc90..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Llama-3.3-70B-Instruct/b227d987-1bec-4124-955a-d81e2e2a52f6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.3-70B-Instruct/1762652580.352333", - "retrieved_timestamp": "1762652580.352334", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meta-llama/Llama-3.3-70B-Instruct", - "developer": "meta-llama", - "inference_platform": "unknown", - "id": "meta-llama/Llama-3.3-70B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8997581971391464 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6919312828325811 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48338368580060426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44612500000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5331615691489362 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Meta-Llama-3-70B-Instruct/5a0ae810-10a3-4497-a81c-a88d2106a5ba.json b/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Meta-Llama-3-70B-Instruct/5a0ae810-10a3-4497-a81c-a88d2106a5ba.json deleted file mode 100644 index 45811e39db08eee2e25922b6fe6c873653e4b8f0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Meta-Llama-3-70B-Instruct/5a0ae810-10a3-4497-a81c-a88d2106a5ba.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meta-llama_Meta-Llama-3-70B-Instruct/1762652580.352748", - "retrieved_timestamp": "1762652580.352749", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meta-llama/Meta-Llama-3-70B-Instruct", - "developer": "meta-llama", - "inference_platform": "unknown", - "id": "meta-llama/Meta-Llama-3-70B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8099077115387172 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6546699432372051 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24471299093655588 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4153645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206948138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Meta-Llama-3-8B-Instruct/108befbc-f9a6-4d5f-9bcf-30fe7cebe35b.json b/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Meta-Llama-3-8B-Instruct/108befbc-f9a6-4d5f-9bcf-30fe7cebe35b.json deleted file mode 100644 index 991fe36ef3be18a66afdf7ed17ef07310d61c199..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Meta-Llama-3-8B-Instruct/108befbc-f9a6-4d5f-9bcf-30fe7cebe35b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meta-llama_Meta-Llama-3-8B-Instruct/1762652580.353369", - "retrieved_timestamp": "1762652580.353369", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meta-llama/Meta-Llama-3-8B-Instruct", - "developer": "meta-llama", - "inference_platform": "unknown", - "id": "meta-llama/Meta-Llama-3-8B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47823220166934843 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4910264175128683 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09138972809667674 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3805416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.359125664893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Meta-Llama-3-8B-Instruct/df2fd3a3-33d0-4ee8-be73-e8d3e00e8184.json b/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Meta-Llama-3-8B-Instruct/df2fd3a3-33d0-4ee8-be73-e8d3e00e8184.json deleted file mode 100644 index da6a7c252e12759c698001a9ff4e7dcf818a0966..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta-llama/meta-llama_Meta-Llama-3-8B-Instruct/df2fd3a3-33d0-4ee8-be73-e8d3e00e8184.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meta-llama_Meta-Llama-3-8B-Instruct/1762652580.353163", - "retrieved_timestamp": "1762652580.353164", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meta-llama/Meta-Llama-3-8B-Instruct", - "developer": "meta-llama", - "inference_platform": "unknown", - "id": "meta-llama/Meta-Llama-3-8B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7408398604591373 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49887111136169526 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08685800604229607 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3568229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3664394946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/3rd-Degree-Burn_Llama-3.1-8B-Squareroot-v1/0851ad0a-7f87-48c8-943a-198ad2ef8ea3.json b/leaderboard_data/HFOpenLLMv2/meta/3rd-Degree-Burn_Llama-3.1-8B-Squareroot-v1/0851ad0a-7f87-48c8-943a-198ad2ef8ea3.json deleted file mode 100644 index 7107f8a8cd2f0eb8ef5fcaf4519fc291e961327e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/3rd-Degree-Burn_Llama-3.1-8B-Squareroot-v1/0851ad0a-7f87-48c8-943a-198ad2ef8ea3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/3rd-Degree-Burn_Llama-3.1-8B-Squareroot-v1/1762652579.470921", - "retrieved_timestamp": "1762652579.470922", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "3rd-Degree-Burn/Llama-3.1-8B-Squareroot-v1", - "developer": "meta", - "inference_platform": "unknown", - "id": "3rd-Degree-Burn/Llama-3.1-8B-Squareroot-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2892381104358657 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33427703119251256 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08836858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3340625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11269946808510638 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/3rd-Degree-Burn_Llama-3.1-8B-Squareroot/cbe8101a-f057-4151-9391-dbd883f4c09e.json b/leaderboard_data/HFOpenLLMv2/meta/3rd-Degree-Burn_Llama-3.1-8B-Squareroot/cbe8101a-f057-4151-9391-dbd883f4c09e.json deleted file mode 100644 index 6a1e9e80c6c524f593673a25cf654fdb2158a311..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/3rd-Degree-Burn_Llama-3.1-8B-Squareroot/cbe8101a-f057-4151-9391-dbd883f4c09e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/3rd-Degree-Burn_Llama-3.1-8B-Squareroot/1762652579.47045", - "retrieved_timestamp": "1762652579.4704509", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "3rd-Degree-Burn/Llama-3.1-8B-Squareroot", - "developer": "meta", - "inference_platform": "unknown", - "id": "3rd-Degree-Burn/Llama-3.1-8B-Squareroot" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22134381219608418 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34609423326328875 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26586102719033233 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3089166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17495013297872342 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/3rd-Degree-Burn_Llama-Squared-8B/fae2328b-af2f-49ff-a817-9406cf40c3d0.json b/leaderboard_data/HFOpenLLMv2/meta/3rd-Degree-Burn_Llama-Squared-8B/fae2328b-af2f-49ff-a817-9406cf40c3d0.json deleted file mode 100644 index d1a9df3050caee0fbcd26fc10b9b85316dec096c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/3rd-Degree-Burn_Llama-Squared-8B/fae2328b-af2f-49ff-a817-9406cf40c3d0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/3rd-Degree-Burn_Llama-Squared-8B/1762652579.471144", - "retrieved_timestamp": "1762652579.471145", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "3rd-Degree-Burn/Llama-Squared-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "3rd-Degree-Burn/Llama-Squared-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27552449722292405 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4431025683868353 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30894791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2366190159574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/AGI-0_Artificium-llama3.1-8B-001/2e3e8be1-725f-4662-a8b1-da4437018e31.json b/leaderboard_data/HFOpenLLMv2/meta/AGI-0_Artificium-llama3.1-8B-001/2e3e8be1-725f-4662-a8b1-da4437018e31.json deleted file mode 100644 index 213ce77b152cf3d6922ea33b229577903f4821a3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/AGI-0_Artificium-llama3.1-8B-001/2e3e8be1-725f-4662-a8b1-da4437018e31.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AGI-0_Artificium-llama3.1-8B-001/1762652579.4738402", - "retrieved_timestamp": "1762652579.473841", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AGI-0/Artificium-llama3.1-8B-001", - "developer": "meta", - "inference_platform": "unknown", - "id": "AGI-0/Artificium-llama3.1-8B-001" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5247687247614108 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42562150225923556 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13595166163141995 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3794583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3181515957446808 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/AGI-0_smartllama3.1-8B-001/c97c2d67-79d5-4813-8569-64eaefe66f89.json b/leaderboard_data/HFOpenLLMv2/meta/AGI-0_smartllama3.1-8B-001/c97c2d67-79d5-4813-8569-64eaefe66f89.json deleted file mode 100644 index 3af95a3ed9357ef94ed8f6e6404250e5d2f55894..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/AGI-0_smartllama3.1-8B-001/c97c2d67-79d5-4813-8569-64eaefe66f89.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AGI-0_smartllama3.1-8B-001/1762652579.4741051", - "retrieved_timestamp": "1762652579.474106", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AGI-0/smartllama3.1-8B-001", - "developer": "meta", - "inference_platform": "unknown", - "id": "AGI-0/smartllama3.1-8B-001" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35178659290682057 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46701787510868176 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43864583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3486535904255319 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ArliAI_Llama-3.1-8B-ArliAI-RPMax-v1.1/1d33cf05-9690-41ba-9288-5f39e5b3c17d.json b/leaderboard_data/HFOpenLLMv2/meta/ArliAI_Llama-3.1-8B-ArliAI-RPMax-v1.1/1d33cf05-9690-41ba-9288-5f39e5b3c17d.json deleted file mode 100644 index b7cc0d1bb56434c599b60f61a1091a1ff9789c5f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ArliAI_Llama-3.1-8B-ArliAI-RPMax-v1.1/1d33cf05-9690-41ba-9288-5f39e5b3c17d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ArliAI_Llama-3.1-8B-ArliAI-RPMax-v1.1/1762652579.4817438", - "retrieved_timestamp": "1762652579.481745", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6359016298975606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5015613456039083 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3576875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35513630319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Azure99_blossom-v5-llama3-8b/19a6e24f-819e-480f-a15f-90273a0a06c5.json b/leaderboard_data/HFOpenLLMv2/meta/Azure99_blossom-v5-llama3-8b/19a6e24f-819e-480f-a15f-90273a0a06c5.json deleted file mode 100644 index 1333fff9fcbfa9ffcbd2a3aba7516b485ffa9f88..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Azure99_blossom-v5-llama3-8b/19a6e24f-819e-480f-a15f-90273a0a06c5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Azure99_blossom-v5-llama3-8b/1762652579.486878", - "retrieved_timestamp": "1762652579.486878", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Azure99/blossom-v5-llama3-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "Azure99/blossom-v5-llama3-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.434293230849701 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4184909197087261 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36702083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2205784574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/BEE-spoke-data_Meta-Llama-3-8Bee/ae5f1f84-091a-4f80-ae40-92ada7e04f94.json b/leaderboard_data/HFOpenLLMv2/meta/BEE-spoke-data_Meta-Llama-3-8Bee/ae5f1f84-091a-4f80-ae40-92ada7e04f94.json deleted file mode 100644 index 915c1632570c150123904eb7eef6e96def3a402a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/BEE-spoke-data_Meta-Llama-3-8Bee/ae5f1f84-091a-4f80-ae40-92ada7e04f94.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BEE-spoke-data_Meta-Llama-3-8Bee/1762652579.491223", - "retrieved_timestamp": "1762652579.491224", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BEE-spoke-data/Meta-Llama-3-8Bee", - "developer": "meta", - "inference_platform": "unknown", - "id": "BEE-spoke-data/Meta-Llama-3-8Bee" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19506575885317623 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46263641905752745 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32197473404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/BEE-spoke-data_smol_llama-101M-GQA/3c1f129b-4f54-4187-876b-c93942179125.json b/leaderboard_data/HFOpenLLMv2/meta/BEE-spoke-data_smol_llama-101M-GQA/3c1f129b-4f54-4187-876b-c93942179125.json deleted file mode 100644 index c29feed8cfac43eed053c761342ee03828be906e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/BEE-spoke-data_smol_llama-101M-GQA/3c1f129b-4f54-4187-876b-c93942179125.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BEE-spoke-data_smol_llama-101M-GQA/1762652579.491745", - "retrieved_timestamp": "1762652579.491746", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BEE-spoke-data/smol_llama-101M-GQA", - "developer": "meta", - "inference_platform": "unknown", - "id": "BEE-spoke-data/smol_llama-101M-GQA" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13843712460715346 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3017560771912554 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3712708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11070478723404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.101 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/BEE-spoke-data_smol_llama-220M-GQA-fineweb_edu/03c78dad-b50d-4f80-91f8-bd8fbb87235d.json b/leaderboard_data/HFOpenLLMv2/meta/BEE-spoke-data_smol_llama-220M-GQA-fineweb_edu/03c78dad-b50d-4f80-91f8-bd8fbb87235d.json deleted file mode 100644 index b095d6fba7d3533f08c5c6cd94c16e02424f75aa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/BEE-spoke-data_smol_llama-220M-GQA-fineweb_edu/03c78dad-b50d-4f80-91f8-bd8fbb87235d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BEE-spoke-data_smol_llama-220M-GQA-fineweb_edu/1762652579.492168", - "retrieved_timestamp": "1762652579.492168", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BEE-spoke-data/smol_llama-220M-GQA-fineweb_edu", - "developer": "meta", - "inference_platform": "unknown", - "id": "BEE-spoke-data/smol_llama-220M-GQA-fineweb_edu" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19881248420856662 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29290517164510593 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4367604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11269946808510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.218 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/BEE-spoke-data_smol_llama-220M-GQA/26596bba-b99d-417f-87be-91de8fa528d3.json b/leaderboard_data/HFOpenLLMv2/meta/BEE-spoke-data_smol_llama-220M-GQA/26596bba-b99d-417f-87be-91de8fa528d3.json deleted file mode 100644 index 829fc10922adf7e10a9f2cafd67b60ed29ee4897..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/BEE-spoke-data_smol_llama-220M-GQA/26596bba-b99d-417f-87be-91de8fa528d3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BEE-spoke-data_smol_llama-220M-GQA/1762652579.491959", - "retrieved_timestamp": "1762652579.49196", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BEE-spoke-data/smol_llama-220M-GQA", - "developer": "meta", - "inference_platform": "unknown", - "id": "BEE-spoke-data/smol_llama-220M-GQA" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23860468002677343 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30316731388708956 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.405875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1149434840425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.218 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/BEE-spoke-data_smol_llama-220M-openhermes/a0de28f1-8186-4eef-b5b4-ce6da71d8271.json b/leaderboard_data/HFOpenLLMv2/meta/BEE-spoke-data_smol_llama-220M-openhermes/a0de28f1-8186-4eef-b5b4-ce6da71d8271.json deleted file mode 100644 index e5e37fc1b115987415cd265b726052c8a701b0d0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/BEE-spoke-data_smol_llama-220M-openhermes/a0de28f1-8186-4eef-b5b4-ce6da71d8271.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BEE-spoke-data_smol_llama-220M-openhermes/1762652579.4923809", - "retrieved_timestamp": "1762652579.492382", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BEE-spoke-data/smol_llama-220M-openhermes", - "developer": "meta", - "inference_platform": "unknown", - "id": "BEE-spoke-data/smol_llama-220M-openhermes" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1555229014570229 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30275191401927726 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3847291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11203457446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.218 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Ba2han_Llama-Phi-3_DoRA/99c4e277-7a0f-4c0c-ac19-25fe6b706a4a.json b/leaderboard_data/HFOpenLLMv2/meta/Ba2han_Llama-Phi-3_DoRA/99c4e277-7a0f-4c0c-ac19-25fe6b706a4a.json deleted file mode 100644 index 16b631a60d0386c2df1d1397b44554b4c38671b0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Ba2han_Llama-Phi-3_DoRA/99c4e277-7a0f-4c0c-ac19-25fe6b706a4a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Ba2han_Llama-Phi-3_DoRA/1762652579.4940102", - "retrieved_timestamp": "1762652579.494011", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Ba2han/Llama-Phi-3_DoRA", - "developer": "meta", - "inference_platform": "unknown", - "id": "Ba2han/Llama-Phi-3_DoRA" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5130531434371911 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5514558259029191 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40692708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39153922872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/BlackBeenie_Llama-3.1-8B-pythonic-passthrough-merge/f852dab4-9c5a-4fb9-99c2-951e7d2300d0.json b/leaderboard_data/HFOpenLLMv2/meta/BlackBeenie_Llama-3.1-8B-pythonic-passthrough-merge/f852dab4-9c5a-4fb9-99c2-951e7d2300d0.json deleted file mode 100644 index 9444d4b99b7c71b6714a3ca7dcb197faf9338e01..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/BlackBeenie_Llama-3.1-8B-pythonic-passthrough-merge/f852dab4-9c5a-4fb9-99c2-951e7d2300d0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BlackBeenie_Llama-3.1-8B-pythonic-passthrough-merge/1762652579.495604", - "retrieved_timestamp": "1762652579.495605", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BlackBeenie/Llama-3.1-8B-pythonic-passthrough-merge", - "developer": "meta", - "inference_platform": "unknown", - "id": "BlackBeenie/Llama-3.1-8B-pythonic-passthrough-merge" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23158552640327662 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3453848032699584 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37781249999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1332280585106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 20.245 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/BlackBeenie_Neos-Llama-3.1-8B/904e3917-3bfd-4c83-8088-6b5ac596e7ea.json b/leaderboard_data/HFOpenLLMv2/meta/BlackBeenie_Neos-Llama-3.1-8B/904e3917-3bfd-4c83-8088-6b5ac596e7ea.json deleted file mode 100644 index 996a3766d3ed2ad6ca405628cff77947afbb8959..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/BlackBeenie_Neos-Llama-3.1-8B/904e3917-3bfd-4c83-8088-6b5ac596e7ea.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BlackBeenie_Neos-Llama-3.1-8B/1762652579.496156", - "retrieved_timestamp": "1762652579.496157", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BlackBeenie/Neos-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "BlackBeenie/Neos-Llama-3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49439376410147295 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4424998411442879 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13217522658610273 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3749895833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32621343085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/BlackBeenie_Neos-Llama-3.1-base/ec9c46a6-a0e9-4174-8ebe-ce33d5eeb27d.json b/leaderboard_data/HFOpenLLMv2/meta/BlackBeenie_Neos-Llama-3.1-base/ec9c46a6-a0e9-4174-8ebe-ce33d5eeb27d.json deleted file mode 100644 index 92ba311157f03d9e56b4636d82c5aff8edd3f71c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/BlackBeenie_Neos-Llama-3.1-base/ec9c46a6-a0e9-4174-8ebe-ce33d5eeb27d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BlackBeenie_Neos-Llama-3.1-base/1762652579.496382", - "retrieved_timestamp": "1762652579.496383", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BlackBeenie/Neos-Llama-3.1-base", - "developer": "meta", - "inference_platform": "unknown", - "id": "BlackBeenie/Neos-Llama-3.1-base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17508211545366295 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29303397468240516 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23741610738255034 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34990625000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11120345744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.65 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/BlackBeenie_llama-3-luminous-merged/9ca4809e-2bf0-477e-b960-64718561583b.json b/leaderboard_data/HFOpenLLMv2/meta/BlackBeenie_llama-3-luminous-merged/9ca4809e-2bf0-477e-b960-64718561583b.json deleted file mode 100644 index e09f3f47fbe18b912d9866d5bcb60333afbbd554..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/BlackBeenie_llama-3-luminous-merged/9ca4809e-2bf0-477e-b960-64718561583b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BlackBeenie_llama-3-luminous-merged/1762652579.496879", - "retrieved_timestamp": "1762652579.49688", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BlackBeenie/llama-3-luminous-merged", - "developer": "meta", - "inference_platform": "unknown", - "id": "BlackBeenie/llama-3-luminous-merged" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43234506664538974 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5153924501559338 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08685800604229607 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4148958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3773271276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/BlackBeenie_llama-3.1-8B-Galore-openassistant-guanaco/7f8d4c8c-4877-4b2f-a0fe-7817894daa79.json b/leaderboard_data/HFOpenLLMv2/meta/BlackBeenie_llama-3.1-8B-Galore-openassistant-guanaco/7f8d4c8c-4877-4b2f-a0fe-7817894daa79.json deleted file mode 100644 index 2a9086614cb1c4cefb23721398e9fbcf1912462a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/BlackBeenie_llama-3.1-8B-Galore-openassistant-guanaco/7f8d4c8c-4877-4b2f-a0fe-7817894daa79.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BlackBeenie_llama-3.1-8B-Galore-openassistant-guanaco/1762652579.4970949", - "retrieved_timestamp": "1762652579.4970958", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BlackBeenie/llama-3.1-8B-Galore-openassistant-guanaco", - "developer": "meta", - "inference_platform": "unknown", - "id": "BlackBeenie/llama-3.1-8B-Galore-openassistant-guanaco" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634842218646525 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5213365363748029 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44062500000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32064494680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Bllossom_llama-3.2-Korean-Bllossom-AICA-5B/e2668c3c-a862-4564-acee-3c3ce439f74f.json b/leaderboard_data/HFOpenLLMv2/meta/Bllossom_llama-3.2-Korean-Bllossom-AICA-5B/e2668c3c-a862-4564-acee-3c3ce439f74f.json deleted file mode 100644 index 060ac7d5bdf875e64882a925df2f82ab8b7998df..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Bllossom_llama-3.2-Korean-Bllossom-AICA-5B/e2668c3c-a862-4564-acee-3c3ce439f74f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Bllossom_llama-3.2-Korean-Bllossom-AICA-5B/1762652579.497314", - "retrieved_timestamp": "1762652579.497314", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Bllossom/llama-3.2-Korean-Bllossom-AICA-5B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Bllossom/llama-3.2-Korean-Bllossom-AICA-5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5172497861230424 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42930745041520607 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3833958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27102726063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MllamaForConditionalGeneration", - "params_billions": 5.199 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/BrainWave-ML_llama3.2-3B-maths-orpo/979ef5b7-12cb-4e4d-81c7-9e6fcb1d6cef.json b/leaderboard_data/HFOpenLLMv2/meta/BrainWave-ML_llama3.2-3B-maths-orpo/979ef5b7-12cb-4e4d-81c7-9e6fcb1d6cef.json deleted file mode 100644 index dcc96dc5ce6efe40ee66ea43e65c483ea37fdace..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/BrainWave-ML_llama3.2-3B-maths-orpo/979ef5b7-12cb-4e4d-81c7-9e6fcb1d6cef.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BrainWave-ML_llama3.2-3B-maths-orpo/1762652579.499409", - "retrieved_timestamp": "1762652579.49941", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BrainWave-ML/llama3.2-3B-maths-orpo", - "developer": "meta", - "inference_platform": "unknown", - "id": "BrainWave-ML/llama3.2-3B-maths-orpo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20490742341431845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911778102988436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35753125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11677194148936171 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/CYFRAGOVPL_Llama-PLLuM-8B-base/01484796-f32b-43fe-b865-517b1a5c0b10.json b/leaderboard_data/HFOpenLLMv2/meta/CYFRAGOVPL_Llama-PLLuM-8B-base/01484796-f32b-43fe-b865-517b1a5c0b10.json deleted file mode 100644 index f16b88993d4e0bafe0ba69da67a214dc2c675b5d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/CYFRAGOVPL_Llama-PLLuM-8B-base/01484796-f32b-43fe-b865-517b1a5c0b10.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_Llama-PLLuM-8B-base/1762652579.500559", - "retrieved_timestamp": "1762652579.5005598", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CYFRAGOVPL/Llama-PLLuM-8B-base", - "developer": "meta", - "inference_platform": "unknown", - "id": "CYFRAGOVPL/Llama-PLLuM-8B-base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28988749850396944 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43204480458140976 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39703125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27568151595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Columbia-NLP_LION-LLaMA-3-8b-odpo-v1.0/c256cede-47bb-487d-9de2-ae7352faa165.json b/leaderboard_data/HFOpenLLMv2/meta/Columbia-NLP_LION-LLaMA-3-8b-odpo-v1.0/c256cede-47bb-487d-9de2-ae7352faa165.json deleted file mode 100644 index 7d97c70f3f8d4b56ec1791f3acb70859be042db1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Columbia-NLP_LION-LLaMA-3-8b-odpo-v1.0/c256cede-47bb-487d-9de2-ae7352faa165.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-LLaMA-3-8b-odpo-v1.0/1762652579.5080209", - "retrieved_timestamp": "1762652579.508022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Columbia-NLP/LION-LLaMA-3-8b-odpo-v1.0", - "developer": "meta", - "inference_platform": "unknown", - "id": "Columbia-NLP/LION-LLaMA-3-8b-odpo-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39679938119744496 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5023929881802022 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40575 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3152426861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ContactDoctor_Bio-Medical-Llama-3-8B/42a3e3b7-b8e3-4470-b1a6-4a3daa146484.json b/leaderboard_data/HFOpenLLMv2/meta/ContactDoctor_Bio-Medical-Llama-3-8B/42a3e3b7-b8e3-4470-b1a6-4a3daa146484.json deleted file mode 100644 index 612bec596175d64318c3675188aae6cb37e20f33..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ContactDoctor_Bio-Medical-Llama-3-8B/42a3e3b7-b8e3-4470-b1a6-4a3daa146484.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ContactDoctor_Bio-Medical-Llama-3-8B/1762652579.510189", - "retrieved_timestamp": "1762652579.510189", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ContactDoctor/Bio-Medical-Llama-3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "ContactDoctor/Bio-Medical-Llama-3-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4422365988909427 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.486311802622738 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35139583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36477726063829785 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Corianas_llama-3-reactor/0670ba93-c3d6-4a74-94e4-4a77311d4984.json b/leaderboard_data/HFOpenLLMv2/meta/Corianas_llama-3-reactor/0670ba93-c3d6-4a74-94e4-4a77311d4984.json deleted file mode 100644 index a5af340b3211ddbd3c287a01dde7598fc84f8d57..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Corianas_llama-3-reactor/0670ba93-c3d6-4a74-94e4-4a77311d4984.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Corianas_llama-3-reactor/1762652579.5122728", - "retrieved_timestamp": "1762652579.512274", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Corianas/llama-3-reactor", - "developer": "meta", - "inference_platform": "unknown", - "id": "Corianas/llama-3-reactor" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23001192391742797 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4457148560545015 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39771874999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2800864361702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": -1.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/CreitinGameplays_Llama-3.1-8B-R1-v0.1/a4b935d4-1664-44e4-ad82-639755c2b909.json b/leaderboard_data/HFOpenLLMv2/meta/CreitinGameplays_Llama-3.1-8B-R1-v0.1/a4b935d4-1664-44e4-ad82-639755c2b909.json deleted file mode 100644 index 6d34aea385d26f240c30eeb5abc7a8b97bceed7a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/CreitinGameplays_Llama-3.1-8B-R1-v0.1/a4b935d4-1664-44e4-ad82-639755c2b909.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/CreitinGameplays_Llama-3.1-8B-R1-v0.1/1762652579.514677", - "retrieved_timestamp": "1762652579.514678", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "CreitinGameplays/Llama-3.1-8B-R1-v0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "CreitinGameplays/Llama-3.1-8B-R1-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.323485019747603 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3057485865545513 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18126888217522658 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36215624999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12516622340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Daemontatox_Llama3.3-70B-CogniLink/20b46645-a1dd-4974-9ad1-444f8ca78481.json b/leaderboard_data/HFOpenLLMv2/meta/Daemontatox_Llama3.3-70B-CogniLink/20b46645-a1dd-4974-9ad1-444f8ca78481.json deleted file mode 100644 index 0635f0137655fa0b5bc9ed8631f8a8c5b6d43b91..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Daemontatox_Llama3.3-70B-CogniLink/20b46645-a1dd-4974-9ad1-444f8ca78481.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_Llama3.3-70B-CogniLink/1762652579.527427", - "retrieved_timestamp": "1762652579.5274282", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/Llama3.3-70B-CogniLink", - "developer": "meta", - "inference_platform": "unknown", - "id": "Daemontatox/Llama3.3-70B-CogniLink" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6931042965996888 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.666832775829349 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41389728096676737 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44546979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4876979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5172872340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Daemontatox_Llama_cot/01a0a741-5f78-4c31-a743-8e42ba73a22d.json b/leaderboard_data/HFOpenLLMv2/meta/Daemontatox_Llama_cot/01a0a741-5f78-4c31-a743-8e42ba73a22d.json deleted file mode 100644 index 95fdfaf32a40bc7631a4a730ad12d311be060c8d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Daemontatox_Llama_cot/01a0a741-5f78-4c31-a743-8e42ba73a22d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_Llama_cot/1762652579.527702", - "retrieved_timestamp": "1762652579.527703", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/Llama_cot", - "developer": "meta", - "inference_platform": "unknown", - "id": "Daemontatox/Llama_cot" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7548781677061308 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4838374335391873 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20241691842900303 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3872395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.351811835106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MllamaForConditionalGeneration", - "params_billions": 10.67 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Danielbrdz_Barcenas-Llama3-8b-ORPO/83f9e48d-919e-42ec-8ea4-cc933a1b98f5.json b/leaderboard_data/HFOpenLLMv2/meta/Danielbrdz_Barcenas-Llama3-8b-ORPO/83f9e48d-919e-42ec-8ea4-cc933a1b98f5.json deleted file mode 100644 index 4b24157435105792db65c78bf7e77feb1ec15d52..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Danielbrdz_Barcenas-Llama3-8b-ORPO/83f9e48d-919e-42ec-8ea4-cc933a1b98f5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-Llama3-8b-ORPO/1762652579.534392", - "retrieved_timestamp": "1762652579.534392", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Danielbrdz/Barcenas-Llama3-8b-ORPO", - "developer": "meta", - "inference_platform": "unknown", - "id": "Danielbrdz/Barcenas-Llama3-8b-ORPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.737242738156979 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49865578559911244 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06570996978851963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4189583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3829787234042553 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/DavidAU_DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm/e2d5ee61-4d0a-4925-b3bf-016b8ff6b1b9.json b/leaderboard_data/HFOpenLLMv2/meta/DavidAU_DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm/e2d5ee61-4d0a-4925-b3bf-016b8ff6b1b9.json deleted file mode 100644 index d229f6a7582faabd974e771502a6cb1d92ea11dd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/DavidAU_DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm/e2d5ee61-4d0a-4925-b3bf-016b8ff6b1b9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavidAU_DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm/1762652579.537201", - "retrieved_timestamp": "1762652579.537202", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavidAU/DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavidAU/DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31356799957446246 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4762231983114653 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10574018126888217 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39278125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3208942819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 16.537 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/DavidAU_DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B/5e116cf4-1be5-44aa-b266-494b1e4127d3.json b/leaderboard_data/HFOpenLLMv2/meta/DavidAU_DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B/5e116cf4-1be5-44aa-b266-494b1e4127d3.json deleted file mode 100644 index e5818dd9b19b9971c6c470a15380f899cd82ca31..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/DavidAU_DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B/5e116cf4-1be5-44aa-b266-494b1e4127d3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavidAU_DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B/1762652579.5376909", - "retrieved_timestamp": "1762652579.537696", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavidAU/DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavidAU/DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36849780803822746 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.488693862545088 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06570996978851963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43197916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2976230053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/DavidAU_DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B/a3b69c21-b6bf-4bf9-9097-ebb26c586829.json b/leaderboard_data/HFOpenLLMv2/meta/DavidAU_DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B/a3b69c21-b6bf-4bf9-9097-ebb26c586829.json deleted file mode 100644 index efb6880512fee10c937b9e3a6e0fb020f7157dea..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/DavidAU_DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B/a3b69c21-b6bf-4bf9-9097-ebb26c586829.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavidAU_DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B/1762652579.538059", - "retrieved_timestamp": "1762652579.53806", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavidAU/DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavidAU/DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2506948230694557 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44878062698346727 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41644791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709441489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 15.664 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/DavidAU_DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B/d827463a-19cd-4bf2-8823-399b22b57387.json b/leaderboard_data/HFOpenLLMv2/meta/DavidAU_DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B/d827463a-19cd-4bf2-8823-399b22b57387.json deleted file mode 100644 index 90404d8243d7a503475ef53b996756a48e9b21ee..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/DavidAU_DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B/d827463a-19cd-4bf2-8823-399b22b57387.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavidAU_DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B/1762652579.5383239", - "retrieved_timestamp": "1762652579.538326", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3882564927725103 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48860331670972784 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30244348404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.942 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/DavidAU_DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B/efad116f-dfc7-4a63-95b1-c61655cd7f0c.json b/leaderboard_data/HFOpenLLMv2/meta/DavidAU_DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B/efad116f-dfc7-4a63-95b1-c61655cd7f0c.json deleted file mode 100644 index df37c4b3f5c196a1cd0732860bf9654d676279c1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/DavidAU_DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B/efad116f-dfc7-4a63-95b1-c61655cd7f0c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavidAU_DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B/1762652579.538624", - "retrieved_timestamp": "1762652579.538625", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3436182662003484 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47693843531787744 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0755287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4230833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29695811170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.942 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/DavidAU_DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B/5af2dce8-b12c-474c-b9e2-b5a38687772d.json b/leaderboard_data/HFOpenLLMv2/meta/DavidAU_DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B/5af2dce8-b12c-474c-b9e2-b5a38687772d.json deleted file mode 100644 index 7e1e592c0af11a945902a014deae6e181035629b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/DavidAU_DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B/5af2dce8-b12c-474c-b9e2-b5a38687772d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavidAU_DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B/1762652579.539129", - "retrieved_timestamp": "1762652579.539129", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavidAU/DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavidAU/DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2853162940996556 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44623832540838126 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.417875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2777593085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 16.537 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/DavidAU_DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B/f2b1fc61-a1c4-431c-b507-7d222ac3aedc.json b/leaderboard_data/HFOpenLLMv2/meta/DavidAU_DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B/f2b1fc61-a1c4-431c-b507-7d222ac3aedc.json deleted file mode 100644 index e981e965d6c4a932cb273b9287f55406cc35c88a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/DavidAU_DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B/f2b1fc61-a1c4-431c-b507-7d222ac3aedc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavidAU_DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B/1762652579.5393531", - "retrieved_timestamp": "1762652579.539354", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavidAU/DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavidAU/DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3793135547015253 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4232300476265338 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3559791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2720246010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 18.405 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/DavieLion_Llama-3.2-1B-SPIN-iter0/62d01464-4163-432c-a017-bedf41cba649.json b/leaderboard_data/HFOpenLLMv2/meta/DavieLion_Llama-3.2-1B-SPIN-iter0/62d01464-4163-432c-a017-bedf41cba649.json deleted file mode 100644 index 17bdfefed0e25356f318fc1e7b6190d74a00d42e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/DavieLion_Llama-3.2-1B-SPIN-iter0/62d01464-4163-432c-a017-bedf41cba649.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavieLion_Llama-3.2-1B-SPIN-iter0/1762652579.5443351", - "retrieved_timestamp": "1762652579.5443368", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavieLion/Llama-3.2-1B-SPIN-iter0", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavieLion/Llama-3.2-1B-SPIN-iter0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15067687070306784 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29300816789978756 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253324468085106 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/DavieLion_Llama-3.2-1B-SPIN-iter0/a9771320-cc89-43fc-b398-7797505bc4e2.json b/leaderboard_data/HFOpenLLMv2/meta/DavieLion_Llama-3.2-1B-SPIN-iter0/a9771320-cc89-43fc-b398-7797505bc4e2.json deleted file mode 100644 index f2fb7966ed5e0b36c2abc7807617e905fe73df40..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/DavieLion_Llama-3.2-1B-SPIN-iter0/a9771320-cc89-43fc-b398-7797505bc4e2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavieLion_Llama-3.2-1B-SPIN-iter0/1762652579.544659", - "retrieved_timestamp": "1762652579.5446599", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavieLion/Llama-3.2-1B-SPIN-iter0", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavieLion/Llama-3.2-1B-SPIN-iter0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15492338107332987 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29372614029730437 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3564791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11278257978723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/DavieLion_Llama-3.2-1B-SPIN-iter1/c380c4b0-7804-4b59-a7e4-700f0a7122b3.json b/leaderboard_data/HFOpenLLMv2/meta/DavieLion_Llama-3.2-1B-SPIN-iter1/c380c4b0-7804-4b59-a7e4-700f0a7122b3.json deleted file mode 100644 index f231a16b258eadf8103fb3e5d849fefee62f4d9d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/DavieLion_Llama-3.2-1B-SPIN-iter1/c380c4b0-7804-4b59-a7e4-700f0a7122b3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavieLion_Llama-3.2-1B-SPIN-iter1/1762652579.5448809", - "retrieved_timestamp": "1762652579.5448818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavieLion/Llama-3.2-1B-SPIN-iter1", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavieLion/Llama-3.2-1B-SPIN-iter1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15754642127333254 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29402546232087917 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3646041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178523936170212 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/DavieLion_Llama-3.2-1B-SPIN-iter2/5723e611-e7e0-47c0-a5ac-162f22690d70.json b/leaderboard_data/HFOpenLLMv2/meta/DavieLion_Llama-3.2-1B-SPIN-iter2/5723e611-e7e0-47c0-a5ac-162f22690d70.json deleted file mode 100644 index 3ebe8bb648ca5cb3ba5c768df55722f8b4a67928..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/DavieLion_Llama-3.2-1B-SPIN-iter2/5723e611-e7e0-47c0-a5ac-162f22690d70.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavieLion_Llama-3.2-1B-SPIN-iter2/1762652579.545113", - "retrieved_timestamp": "1762652579.545114", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavieLion/Llama-3.2-1B-SPIN-iter2", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavieLion/Llama-3.2-1B-SPIN-iter2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13761264555822994 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2980340303779312 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444108 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35530208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11286569148936171 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/DavieLion_Llama-3.2-1B-SPIN-iter3/07d16051-fe48-46e6-a47c-806e9f95a92b.json b/leaderboard_data/HFOpenLLMv2/meta/DavieLion_Llama-3.2-1B-SPIN-iter3/07d16051-fe48-46e6-a47c-806e9f95a92b.json deleted file mode 100644 index 0424479ac0d4c9001a1a4d4579295095d49a5fb6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/DavieLion_Llama-3.2-1B-SPIN-iter3/07d16051-fe48-46e6-a47c-806e9f95a92b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavieLion_Llama-3.2-1B-SPIN-iter3/1762652579.54562", - "retrieved_timestamp": "1762652579.545621", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavieLion/Llama-3.2-1B-SPIN-iter3", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavieLion/Llama-3.2-1B-SPIN-iter3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1323920530858123 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29722352809482616 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3526666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11286569148936171 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/DavieLion_Llama-3.2-1B-SPIN-iter3/7a91746e-e622-4eef-aef8-5f0ba04f03c9.json b/leaderboard_data/HFOpenLLMv2/meta/DavieLion_Llama-3.2-1B-SPIN-iter3/7a91746e-e622-4eef-aef8-5f0ba04f03c9.json deleted file mode 100644 index 16ec888359be7c1142865d6ac57a7654f7316355..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/DavieLion_Llama-3.2-1B-SPIN-iter3/7a91746e-e622-4eef-aef8-5f0ba04f03c9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DavieLion_Llama-3.2-1B-SPIN-iter3/1762652579.5453749", - "retrieved_timestamp": "1762652579.545376", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DavieLion/Llama-3.2-1B-SPIN-iter3", - "developer": "meta", - "inference_platform": "unknown", - "id": "DavieLion/Llama-3.2-1B-SPIN-iter3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1335910938531984 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29752276438021447 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34996875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11278257978723404 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/DeepAutoAI_Explore_Llama-3.1-8B-Inst/0da22342-b4ef-4dd2-b4f5-327710986701.json b/leaderboard_data/HFOpenLLMv2/meta/DeepAutoAI_Explore_Llama-3.1-8B-Inst/0da22342-b4ef-4dd2-b4f5-327710986701.json deleted file mode 100644 index 3db9c335d76b6697d46c65f30ac43358dc1b952a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/DeepAutoAI_Explore_Llama-3.1-8B-Inst/0da22342-b4ef-4dd2-b4f5-327710986701.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepAutoAI_Explore_Llama-3.1-8B-Inst/1762652579.547036", - "retrieved_timestamp": "1762652579.5470378", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepAutoAI/Explore_Llama-3.1-8B-Inst", - "developer": "meta", - "inference_platform": "unknown", - "id": "DeepAutoAI/Explore_Llama-3.1-8B-Inst" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7794828831943688 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.511742159482904 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20090634441087613 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3909583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.379155585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/DeepAutoAI_Explore_Llama-3.2-1B-Inst/f8e00446-f253-4ff3-a9ff-ef182cf9e147.json b/leaderboard_data/HFOpenLLMv2/meta/DeepAutoAI_Explore_Llama-3.2-1B-Inst/f8e00446-f253-4ff3-a9ff-ef182cf9e147.json deleted file mode 100644 index 068803d82e7d28832e884089c4bc6107ee1deb92..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/DeepAutoAI_Explore_Llama-3.2-1B-Inst/f8e00446-f253-4ff3-a9ff-ef182cf9e147.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepAutoAI_Explore_Llama-3.2-1B-Inst/1762652579.5474088", - "retrieved_timestamp": "1762652579.547411", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepAutoAI/Explore_Llama-3.2-1B-Inst", - "developer": "meta", - "inference_platform": "unknown", - "id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5648856146136695 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35048085637770016 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07477341389728097 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31834375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18085106382978725 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/DeepAutoAI_Explore_Llama-3.2-1B-Inst_v0/455764e4-7b66-4189-b2e8-907047a92d45.json b/leaderboard_data/HFOpenLLMv2/meta/DeepAutoAI_Explore_Llama-3.2-1B-Inst_v0/455764e4-7b66-4189-b2e8-907047a92d45.json deleted file mode 100644 index e3017f72f7b0aa48ac8b70ae41cc2c07cb485e43..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/DeepAutoAI_Explore_Llama-3.2-1B-Inst_v0/455764e4-7b66-4189-b2e8-907047a92d45.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepAutoAI_Explore_Llama-3.2-1B-Inst_v0/1762652579.547727", - "retrieved_timestamp": "1762652579.5477278", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v0", - "developer": "meta", - "inference_platform": "unknown", - "id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5597148898256625 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33650903200352716 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05966767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3103125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18035239361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/DeepAutoAI_Explore_Llama-3.2-1B-Inst_v1.1/40bc60f8-aa35-460b-a7af-b4cccd138c80.json b/leaderboard_data/HFOpenLLMv2/meta/DeepAutoAI_Explore_Llama-3.2-1B-Inst_v1.1/40bc60f8-aa35-460b-a7af-b4cccd138c80.json deleted file mode 100644 index 0678fd2c620d11ea3570d0dbf8d4a6080eccd2ff..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/DeepAutoAI_Explore_Llama-3.2-1B-Inst_v1.1/40bc60f8-aa35-460b-a7af-b4cccd138c80.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepAutoAI_Explore_Llama-3.2-1B-Inst_v1.1/1762652579.5483131", - "retrieved_timestamp": "1762652579.548314", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5844193406827218 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3512662445055541 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3117083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18184840425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/DeepAutoAI_Explore_Llama-3.2-1B-Inst_v1/74f0ecd4-e04a-4775-9551-fc0e9fa40314.json b/leaderboard_data/HFOpenLLMv2/meta/DeepAutoAI_Explore_Llama-3.2-1B-Inst_v1/74f0ecd4-e04a-4775-9551-fc0e9fa40314.json deleted file mode 100644 index b7fb140e2490ba940e0a265f8ef70ea443d15565..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/DeepAutoAI_Explore_Llama-3.2-1B-Inst_v1/74f0ecd4-e04a-4775-9551-fc0e9fa40314.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepAutoAI_Explore_Llama-3.2-1B-Inst_v1/1762652579.548037", - "retrieved_timestamp": "1762652579.548039", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1", - "developer": "meta", - "inference_platform": "unknown", - "id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4998891829235318 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3141475230443668 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24496644295302014 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37809374999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12691156914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/DeepAutoAI_ldm_soup_Llama-3.1-8B-Inst/a4da2ab3-adb3-405f-9bb7-2164d740d424.json b/leaderboard_data/HFOpenLLMv2/meta/DeepAutoAI_ldm_soup_Llama-3.1-8B-Inst/a4da2ab3-adb3-405f-9bb7-2164d740d424.json deleted file mode 100644 index c72bc026b000bb36dd78820e15493a0dddf465f1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/DeepAutoAI_ldm_soup_Llama-3.1-8B-Inst/a4da2ab3-adb3-405f-9bb7-2164d740d424.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepAutoAI_ldm_soup_Llama-3.1-8B-Inst/1762652579.5498", - "retrieved_timestamp": "1762652579.5498009", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Inst", - "developer": "meta", - "inference_platform": "unknown", - "id": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Inst" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.803263119633683 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.512116784464076 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18882175226586104 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41613541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38863031914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/DeepMount00_Llama-3-8b-Ita/bee65c80-73f2-46e5-9532-8f92b38c4fc5.json b/leaderboard_data/HFOpenLLMv2/meta/DeepMount00_Llama-3-8b-Ita/bee65c80-73f2-46e5-9532-8f92b38c4fc5.json deleted file mode 100644 index 53079ef6356f9bd41178b2ec04e30f6a6344169c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/DeepMount00_Llama-3-8b-Ita/bee65c80-73f2-46e5-9532-8f92b38c4fc5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepMount00_Llama-3-8b-Ita/1762652579.551231", - "retrieved_timestamp": "1762652579.551231", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepMount00/Llama-3-8b-Ita", - "developer": "meta", - "inference_platform": "unknown", - "id": "DeepMount00/Llama-3-8b-Ita" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7530297388706411 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.493576505761469 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4267708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38522273936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/DeepMount00_Llama-3.1-8b-ITA/1c5ce85b-84f3-4ac4-8a98-9d80659bff18.json b/leaderboard_data/HFOpenLLMv2/meta/DeepMount00_Llama-3.1-8b-ITA/1c5ce85b-84f3-4ac4-8a98-9d80659bff18.json deleted file mode 100644 index baf7f4266373100a80186cb02fe5d27255a4352f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/DeepMount00_Llama-3.1-8b-ITA/1c5ce85b-84f3-4ac4-8a98-9d80659bff18.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepMount00_Llama-3.1-8b-ITA/1762652579.5514839", - "retrieved_timestamp": "1762652579.5514848", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepMount00/Llama-3.1-8b-ITA", - "developer": "meta", - "inference_platform": "unknown", - "id": "DeepMount00/Llama-3.1-8b-ITA" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7916727616058724 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5109356715302854 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10876132930513595 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41359375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38763297872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/DeepMount00_Llama-3.1-8b-ITA/ca297bdd-d804-4c43-bb6e-0b7e230974e2.json b/leaderboard_data/HFOpenLLMv2/meta/DeepMount00_Llama-3.1-8b-ITA/ca297bdd-d804-4c43-bb6e-0b7e230974e2.json deleted file mode 100644 index e6bcbfd0b4cd3dad53e1372cb31894a9b6869125..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/DeepMount00_Llama-3.1-8b-ITA/ca297bdd-d804-4c43-bb6e-0b7e230974e2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepMount00_Llama-3.1-8b-Ita/1762652579.551703", - "retrieved_timestamp": "1762652579.5517042", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepMount00/Llama-3.1-8b-Ita", - "developer": "meta", - "inference_platform": "unknown", - "id": "DeepMount00/Llama-3.1-8b-Ita" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5364843060856306 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5169995464792883 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44871875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39602726063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Unknown", - "params_billions": 0.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/DeepMount00_Llama-3.1-Distilled/6424a285-b3dc-4221-b3ba-5e7922185269.json b/leaderboard_data/HFOpenLLMv2/meta/DeepMount00_Llama-3.1-Distilled/6424a285-b3dc-4221-b3ba-5e7922185269.json deleted file mode 100644 index 22e69f9c533be6b6935f82e026bd40db81c0521a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/DeepMount00_Llama-3.1-Distilled/6424a285-b3dc-4221-b3ba-5e7922185269.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepMount00_Llama-3.1-Distilled/1762652579.551904", - "retrieved_timestamp": "1762652579.551905", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepMount00/Llama-3.1-Distilled", - "developer": "meta", - "inference_platform": "unknown", - "id": "DeepMount00/Llama-3.1-Distilled" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7843787816327346 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5100875314179011 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40581249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3781582446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/DevQuasar_DevQuasar-R1-Uncensored-Llama-8B/490df557-2f50-434a-a28d-a78a234da9fa.json b/leaderboard_data/HFOpenLLMv2/meta/DevQuasar_DevQuasar-R1-Uncensored-Llama-8B/490df557-2f50-434a-a28d-a78a234da9fa.json deleted file mode 100644 index 5bc30fbc5dbecb8682e8bc620b4b3e684042b133..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/DevQuasar_DevQuasar-R1-Uncensored-Llama-8B/490df557-2f50-434a-a28d-a78a234da9fa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DevQuasar_DevQuasar-R1-Uncensored-Llama-8B/1762652579.555449", - "retrieved_timestamp": "1762652579.5554502", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DevQuasar/DevQuasar-R1-Uncensored-Llama-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "DevQuasar/DevQuasar-R1-Uncensored-Llama-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38488432913558246 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5117943836412089 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33081570996978854 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44357291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3614527925531915 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Enno-Ai_EnnoAi-Pro-French-Llama-3-8B-v0.4/bbc78d6d-09e3-410a-9bf9-a6dcdbef346e.json b/leaderboard_data/HFOpenLLMv2/meta/Enno-Ai_EnnoAi-Pro-French-Llama-3-8B-v0.4/bbc78d6d-09e3-410a-9bf9-a6dcdbef346e.json deleted file mode 100644 index b4fcbe51a6a267bfa3e82bd02df377f6cca28b75..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Enno-Ai_EnnoAi-Pro-French-Llama-3-8B-v0.4/bbc78d6d-09e3-410a-9bf9-a6dcdbef346e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Enno-Ai_EnnoAi-Pro-French-Llama-3-8B-v0.4/1762652579.5956101", - "retrieved_timestamp": "1762652579.5956109", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Enno-Ai/EnnoAi-Pro-French-Llama-3-8B-v0.4", - "developer": "meta", - "inference_platform": "unknown", - "id": "Enno-Ai/EnnoAi-Pro-French-Llama-3-8B-v0.4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4188807918545016 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4074954889367559 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41700000000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634640957446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.031 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Enno-Ai_EnnoAi-Pro-Llama-3-8B-v0.3/f1e005a2-b949-4518-b7e5-3fd7af3fcf0f.json b/leaderboard_data/HFOpenLLMv2/meta/Enno-Ai_EnnoAi-Pro-Llama-3-8B-v0.3/f1e005a2-b949-4518-b7e5-3fd7af3fcf0f.json deleted file mode 100644 index b4fa4cb7ec18805b9564203db5578d743cca5c30..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Enno-Ai_EnnoAi-Pro-Llama-3-8B-v0.3/f1e005a2-b949-4518-b7e5-3fd7af3fcf0f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Enno-Ai_EnnoAi-Pro-Llama-3-8B-v0.3/1762652579.596117", - "retrieved_timestamp": "1762652579.596118", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Enno-Ai/EnnoAi-Pro-Llama-3-8B-v0.3", - "developer": "meta", - "inference_platform": "unknown", - "id": "Enno-Ai/EnnoAi-Pro-Llama-3-8B-v0.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5082569803676467 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4100577461090639 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42357291666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2990359042553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Enno-Ai_EnnoAi-Pro-Llama-3-8B/39a6c969-d938-4e4c-9adc-f71f1d30143d.json b/leaderboard_data/HFOpenLLMv2/meta/Enno-Ai_EnnoAi-Pro-Llama-3-8B/39a6c969-d938-4e4c-9adc-f71f1d30143d.json deleted file mode 100644 index cf4b47418d1beb9b34c0458b813c84b383ccfa09..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Enno-Ai_EnnoAi-Pro-Llama-3-8B/39a6c969-d938-4e4c-9adc-f71f1d30143d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Enno-Ai_EnnoAi-Pro-Llama-3-8B/1762652579.5958989", - "retrieved_timestamp": "1762652579.5958998", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Enno-Ai/EnnoAi-Pro-Llama-3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Enno-Ai/EnnoAi-Pro-Llama-3-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31953771548380516 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4151575806137866 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21509308510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.031 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Enno-Ai_EnnoAi-Pro-Llama-3.1-8B-v0.9/cf0ca830-4bb6-4317-97ae-380f54518d9f.json b/leaderboard_data/HFOpenLLMv2/meta/Enno-Ai_EnnoAi-Pro-Llama-3.1-8B-v0.9/cf0ca830-4bb6-4317-97ae-380f54518d9f.json deleted file mode 100644 index 42ea4a1cc0d83e30b31b520f7be22fc92295a8f5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Enno-Ai_EnnoAi-Pro-Llama-3.1-8B-v0.9/cf0ca830-4bb6-4317-97ae-380f54518d9f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Enno-Ai_EnnoAi-Pro-Llama-3.1-8B-v0.9/1762652579.5963311", - "retrieved_timestamp": "1762652579.596332", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Enno-Ai/EnnoAi-Pro-Llama-3.1-8B-v0.9", - "developer": "meta", - "inference_platform": "unknown", - "id": "Enno-Ai/EnnoAi-Pro-Llama-3.1-8B-v0.9" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4689147018799009 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41602720836190127 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3831770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2595578457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/EnnoAi_EnnoAi-Pro-Llama-3.1-8B-v1.0/32c712e0-4f63-4188-b4c8-5f37b6101e3f.json b/leaderboard_data/HFOpenLLMv2/meta/EnnoAi_EnnoAi-Pro-Llama-3.1-8B-v1.0/32c712e0-4f63-4188-b4c8-5f37b6101e3f.json deleted file mode 100644 index de79f20412c328ba5dd8a06912807564558bbb98..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/EnnoAi_EnnoAi-Pro-Llama-3.1-8B-v1.0/32c712e0-4f63-4188-b4c8-5f37b6101e3f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EnnoAi_EnnoAi-Pro-Llama-3.1-8B-v1.0/1762652579.596818", - "retrieved_timestamp": "1762652579.596819", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EnnoAi/EnnoAi-Pro-Llama-3.1-8B-v1.0", - "developer": "meta", - "inference_platform": "unknown", - "id": "EnnoAi/EnnoAi-Pro-Llama-3.1-8B-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4704384366813389 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41602720836190127 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3831770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2595578457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI2_Fireball-Alpaca-Llama3.1-8B-Philos/392ea212-afd9-44a3-a6bb-2bba8f124492.json b/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI2_Fireball-Alpaca-Llama3.1-8B-Philos/392ea212-afd9-44a3-a6bb-2bba8f124492.json deleted file mode 100644 index 13a6d999b683effaab2d3aa90731412742cfd4e8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI2_Fireball-Alpaca-Llama3.1-8B-Philos/392ea212-afd9-44a3-a6bb-2bba8f124492.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1-8B-Philos/1762652579.6100821", - "retrieved_timestamp": "1762652579.610083", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1-8B-Philos", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1-8B-Philos" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.498640274471735 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4977581192690881 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11858006042296072 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42766666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3405917553191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI2_Fireball-Alpaca-Llama3.1.01-8B-Philos/536229bc-b1fb-4078-826c-074b09c362b9.json b/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI2_Fireball-Alpaca-Llama3.1.01-8B-Philos/536229bc-b1fb-4078-826c-074b09c362b9.json deleted file mode 100644 index cb04f2f2caa570097304b898872de75df66ed7d1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI2_Fireball-Alpaca-Llama3.1.01-8B-Philos/536229bc-b1fb-4078-826c-074b09c362b9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.01-8B-Philos/1762652579.610341", - "retrieved_timestamp": "1762652579.610341", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.01-8B-Philos", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.01-8B-Philos" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42117913802045237 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49561092312727917 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13595166163141995 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43706249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33834773936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI2_Fireball-Alpaca-Llama3.1.03-8B-Philos/b77a4371-97d7-43a0-892f-a0c01c2b8528.json b/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI2_Fireball-Alpaca-Llama3.1.03-8B-Philos/b77a4371-97d7-43a0-892f-a0c01c2b8528.json deleted file mode 100644 index 459538ccaf8f6053f575878b8bb297661ba02e48..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI2_Fireball-Alpaca-Llama3.1.03-8B-Philos/b77a4371-97d7-43a0-892f-a0c01c2b8528.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.03-8B-Philos/1762652579.6105568", - "retrieved_timestamp": "1762652579.610558", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.03-8B-Philos", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.03-8B-Philos" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3880814017916905 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49508699339363266 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42801041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3355219414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI2_Fireball-Alpaca-Llama3.1.04-8B-Philos/de05ec0d-805d-4aa5-8ec3-1dc7446e6c1a.json b/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI2_Fireball-Alpaca-Llama3.1.04-8B-Philos/de05ec0d-805d-4aa5-8ec3-1dc7446e6c1a.json deleted file mode 100644 index c7b55a98bd4762cfe128399c5ae8b5d4df6f3efc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI2_Fireball-Alpaca-Llama3.1.04-8B-Philos/de05ec0d-805d-4aa5-8ec3-1dc7446e6c1a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.04-8B-Philos/1762652579.6107578", - "retrieved_timestamp": "1762652579.610759", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.04-8B-Philos", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.04-8B-Philos" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40843960690966635 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4930009712421776 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43721875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3402593085106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI2_Fireball-Alpaca-Llama3.1.07-8B-Philos-Math/2790feab-6850-4d51-a3a1-78ada0c56d03.json b/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI2_Fireball-Alpaca-Llama3.1.07-8B-Philos-Math/2790feab-6850-4d51-a3a1-78ada0c56d03.json deleted file mode 100644 index acbedd16d825592dfd3b62797ff3ddb9f8237459..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI2_Fireball-Alpaca-Llama3.1.07-8B-Philos-Math/2790feab-6850-4d51-a3a1-78ada0c56d03.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.07-8B-Philos-Math/1762652579.611186", - "retrieved_timestamp": "1762652579.611187", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5079079065767719 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4847020640542447 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40630208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35305851063829785 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI2_Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection/42a38b08-6eb7-449d-99c5-cb0b2b76dd06.json b/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI2_Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection/42a38b08-6eb7-449d-99c5-cb0b2b76dd06.json deleted file mode 100644 index c270309724246c1cb19bcafa6a32796847a635aa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI2_Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection/42a38b08-6eb7-449d-99c5-cb0b2b76dd06.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection/1762652579.611454", - "retrieved_timestamp": "1762652579.611454", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39522577871159636 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49553052334314723 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12462235649546828 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4048125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35929188829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI2_Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1/9ce9031b-76fd-4c33-b209-3011643d9266.json b/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI2_Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1/9ce9031b-76fd-4c33-b209-3011643d9266.json deleted file mode 100644 index c04f4bc9947052e70ee9bdae707bc2e02fc7a79a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI2_Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1/9ce9031b-76fd-4c33-b209-3011643d9266.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1/1762652579.611669", - "retrieved_timestamp": "1762652579.61167", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5316382753316755 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4827931104634334 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4103020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523105053191489 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI2_Fireball-Llama-3.1-8B-Philos-Reflection/5ea20ab3-9d05-43f1-a276-7acbd2229fe8.json b/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI2_Fireball-Llama-3.1-8B-Philos-Reflection/5ea20ab3-9d05-43f1-a276-7acbd2229fe8.json deleted file mode 100644 index 47f8ba0dc566f6b161767edb874b5ed1aed73182..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI2_Fireball-Llama-3.1-8B-Philos-Reflection/5ea20ab3-9d05-43f1-a276-7acbd2229fe8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Llama-3.1-8B-Philos-Reflection/1762652579.6118872", - "retrieved_timestamp": "1762652579.6118872", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-Llama-3.1-8B-Philos-Reflection", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-Llama-3.1-8B-Philos-Reflection" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3596047376516532 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4897693552241443 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3957291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3550531914893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Alpaca-Llama3.1-8B/cd4698d8-e9d0-4a00-855a-6e0b9cfc31d8.json b/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Alpaca-Llama3.1-8B/cd4698d8-e9d0-4a00-855a-6e0b9cfc31d8.json deleted file mode 100644 index 5d8bb879c15cb073a319663add82eede5c8c240e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Alpaca-Llama3.1-8B/cd4698d8-e9d0-4a00-855a-6e0b9cfc31d8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Alpaca-Llama3.1-8B/1762652579.5979578", - "retrieved_timestamp": "1762652579.5979588", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Alpaca-Llama3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI/Alpaca-Llama3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15986914719610634 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47552608539742874 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3402604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3246343085106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta/88e9cdd1-ad46-4ad0-9e9b-d872cdb63257.json b/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta/88e9cdd1-ad46-4ad0-9e9b-d872cdb63257.json deleted file mode 100644 index ebfcd921bd65782bf89c3a59c6f24435a8f88320..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta/88e9cdd1-ad46-4ad0-9e9b-d872cdb63257.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta/1762652579.600618", - "retrieved_timestamp": "1762652579.600619", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7274010735958367 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48648902139668476 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15256797583081572 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3619375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3543051861702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2/60d939fa-9ae2-4226-a955-d586c27fea68.json b/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2/60d939fa-9ae2-4226-a955-d586c27fea68.json deleted file mode 100644 index 4b475532860d41f96cf419f93c2e53e07a60ee6c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2/60d939fa-9ae2-4226-a955-d586c27fea68.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2/1762652579.600828", - "retrieved_timestamp": "1762652579.600829", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46731561146646455 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4932027479020209 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46236458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3351894946808511 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Fireball-R1-Llama-3.1-8B-Medical-COT/1bfd3789-e95b-487c-9c8a-516c017f6558.json b/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Fireball-R1-Llama-3.1-8B-Medical-COT/1bfd3789-e95b-487c-9c8a-516c017f6558.json deleted file mode 100644 index 9b3f9bc1c110727ddcf94c0b81e2321d52709bf0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Fireball-R1-Llama-3.1-8B-Medical-COT/1bfd3789-e95b-487c-9c8a-516c017f6558.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-R1-Llama-3.1-8B-Medical-COT/1762652579.603883", - "retrieved_timestamp": "1762652579.603883", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Fireball-R1-Llama-3.1-8B-Medical-COT", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-R1-Llama-3.1-8B-Medical-COT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3216111029845255 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37162741490176326 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3270392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31136458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1402094414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Fireball-R1-Llama-3.1-8B/85ff1b65-eade-4d70-a278-99605f324e5a.json b/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Fireball-R1-Llama-3.1-8B/85ff1b65-eade-4d70-a278-99605f324e5a.json deleted file mode 100644 index 3da88601a89a0e5f43af9b43463070a6fc05456a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Fireball-R1-Llama-3.1-8B/85ff1b65-eade-4d70-a278-99605f324e5a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-R1-Llama-3.1-8B/1762652579.603668", - "retrieved_timestamp": "1762652579.603669", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Fireball-R1-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-R1-Llama-3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4427363839058143 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36434977901496834 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32879166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11153590425531915 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Fireball-R1.1-Llama-3.1-8B/5938f7d8-dddb-4989-81c6-e57e177e52c9.json b/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Fireball-R1.1-Llama-3.1-8B/5938f7d8-dddb-4989-81c6-e57e177e52c9.json deleted file mode 100644 index e88f2d7be21b6873517e1387fa0cff7ce3d8f6e2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Fireball-R1.1-Llama-3.1-8B/5938f7d8-dddb-4989-81c6-e57e177e52c9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-R1.1-Llama-3.1-8B/1762652579.604102", - "retrieved_timestamp": "1762652579.604102", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Fireball-R1.1-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-R1.1-Llama-3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3676234613048932 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33260007841271594 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13821752265861026 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3419375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11153590425531915 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Llama-3.2-3B-Agent007-Coder/ab812077-8d2b-40f8-bc49-65fffd7f6f26.json b/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Llama-3.2-3B-Agent007-Coder/ab812077-8d2b-40f8-bc49-65fffd7f6f26.json deleted file mode 100644 index b2262271c767be4891301eee92ebace688eda828..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Llama-3.2-3B-Agent007-Coder/ab812077-8d2b-40f8-bc49-65fffd7f6f26.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Llama-3.2-3B-Agent007-Coder/1762652579.6043148", - "retrieved_timestamp": "1762652579.6043148", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Llama-3.2-3B-Agent007-Coder", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI/Llama-3.2-3B-Agent007-Coder" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5399562050913798 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4303758760727905 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11102719033232629 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36680208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28515625 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_OpenReasoner-Llama-3.2-3B-rs1.0/610f3053-b2a9-45a8-ac09-af3edcb8c826.json b/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_OpenReasoner-Llama-3.2-3B-rs1.0/610f3053-b2a9-45a8-ac09-af3edcb8c826.json deleted file mode 100644 index e813e5e9305ee3f14e611e89dd854c50f85c72a0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_OpenReasoner-Llama-3.2-3B-rs1.0/610f3053-b2a9-45a8-ac09-af3edcb8c826.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_OpenReasoner-Llama-3.2-3B-rs1.0/1762652579.604741", - "retrieved_timestamp": "1762652579.6047418", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/OpenReasoner-Llama-3.2-3B-rs1.0", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI/OpenReasoner-Llama-3.2-3B-rs1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7274010735958367 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45185934849403964 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3460625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31341422872340424 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO/14560449-0481-4346-aab2-ff75fdab691b.json b/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO/14560449-0481-4346-aab2-ff75fdab691b.json deleted file mode 100644 index e27c739a7ddacfe2f357e247dd97877c84a17cd0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO/14560449-0481-4346-aab2-ff75fdab691b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO/1762652579.606164", - "retrieved_timestamp": "1762652579.606165", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4553263119633683 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4804219047211424 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.393125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3597905585106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Reasoning-Llama-3.1-CoT-RE1-NMT/807ed760-775e-4082-90ea-7b524038bebf.json b/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Reasoning-Llama-3.1-CoT-RE1-NMT/807ed760-775e-4082-90ea-7b524038bebf.json deleted file mode 100644 index 83a5b6b0ae17b600334169448352808b3a1add39..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/EpistemeAI_Reasoning-Llama-3.1-CoT-RE1-NMT/807ed760-775e-4082-90ea-7b524038bebf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.1-CoT-RE1-NMT/1762652579.6059399", - "retrieved_timestamp": "1762652579.605941", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT", - "developer": "meta", - "inference_platform": "unknown", - "id": "EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4828532737580731 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47357563863974517 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31821875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33427526595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Etherll_Herplete-LLM-Llama-3.1-8b-Ties/febdde9e-8e67-458b-be79-6a9c91a7237a.json b/leaderboard_data/HFOpenLLMv2/meta/Etherll_Herplete-LLM-Llama-3.1-8b-Ties/febdde9e-8e67-458b-be79-6a9c91a7237a.json deleted file mode 100644 index 7b6c24f3e8ca2a43adf4d7416c2aa31e8a5b9915..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Etherll_Herplete-LLM-Llama-3.1-8b-Ties/febdde9e-8e67-458b-be79-6a9c91a7237a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Etherll_Herplete-LLM-Llama-3.1-8b-Ties/1762652579.614388", - "retrieved_timestamp": "1762652579.614389", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Etherll/Herplete-LLM-Llama-3.1-8b-Ties", - "developer": "meta", - "inference_platform": "unknown", - "id": "Etherll/Herplete-LLM-Llama-3.1-8b-Ties" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6163679038285084 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5337975953250876 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40171874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375249335106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Etherll_Herplete-LLM-Llama-3.1-8b/3d70d2d7-1510-45de-93dc-1ba93cb0f24a.json b/leaderboard_data/HFOpenLLMv2/meta/Etherll_Herplete-LLM-Llama-3.1-8b/3d70d2d7-1510-45de-93dc-1ba93cb0f24a.json deleted file mode 100644 index ae3bc41e08d79bde2dacf2ac7be662cae902877e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Etherll_Herplete-LLM-Llama-3.1-8b/3d70d2d7-1510-45de-93dc-1ba93cb0f24a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Etherll_Herplete-LLM-Llama-3.1-8b/1762652579.614203", - "retrieved_timestamp": "1762652579.614203", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Etherll/Herplete-LLM-Llama-3.1-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "Etherll/Herplete-LLM-Llama-3.1-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6105976586568084 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5347253355929804 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15483383685800603 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3990520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375249335106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Etherll_Herplete-LLM-Llama-3.1-8b/52e6e50e-4621-491f-9e46-8d6d398c4344.json b/leaderboard_data/HFOpenLLMv2/meta/Etherll_Herplete-LLM-Llama-3.1-8b/52e6e50e-4621-491f-9e46-8d6d398c4344.json deleted file mode 100644 index 2482b4a7c1f41350450dd72a8bcc1900ce53e08f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Etherll_Herplete-LLM-Llama-3.1-8b/52e6e50e-4621-491f-9e46-8d6d398c4344.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Etherll_Herplete-LLM-Llama-3.1-8b/1762652579.613958", - "retrieved_timestamp": "1762652579.6139588", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Etherll/Herplete-LLM-Llama-3.1-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "Etherll/Herplete-LLM-Llama-3.1-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46719149634082013 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5013428726325629 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38599999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34815492021276595 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Etherll_Replete-LLM-V3-Llama-3.1-8b/66846c9d-e2bc-416d-95b4-fed31d1b781b.json b/leaderboard_data/HFOpenLLMv2/meta/Etherll_Replete-LLM-V3-Llama-3.1-8b/66846c9d-e2bc-416d-95b4-fed31d1b781b.json deleted file mode 100644 index b430272087b6b8d12328959df143dbb776e4327e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Etherll_Replete-LLM-V3-Llama-3.1-8b/66846c9d-e2bc-416d-95b4-fed31d1b781b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Etherll_Replete-LLM-V3-Llama-3.1-8b/1762652579.6150668", - "retrieved_timestamp": "1762652579.615068", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Etherll/Replete-LLM-V3-Llama-3.1-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "Etherll/Replete-LLM-V3-Llama-3.1-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5262924595628488 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4543377420594779 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22734138972809667 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3516458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34699135638297873 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Eurdem_Defne-llama3.1-8B/52eb695b-3d17-4abe-a386-7927348e5dd5.json b/leaderboard_data/HFOpenLLMv2/meta/Eurdem_Defne-llama3.1-8B/52eb695b-3d17-4abe-a386-7927348e5dd5.json deleted file mode 100644 index fe0407f5a9eea1f9aaf3d42331f37d3f69f762e5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Eurdem_Defne-llama3.1-8B/52eb695b-3d17-4abe-a386-7927348e5dd5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Eurdem_Defne-llama3.1-8B/1762652579.615498", - "retrieved_timestamp": "1762652579.615499", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Eurdem/Defne-llama3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Eurdem/Defne-llama3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5036115285220991 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5320979090308238 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43309375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3865525265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/GenVRadmin_llama38bGenZ_Vikas-Merged/22a01298-038f-4069-b847-43409d2d4baa.json b/leaderboard_data/HFOpenLLMv2/meta/GenVRadmin_llama38bGenZ_Vikas-Merged/22a01298-038f-4069-b847-43409d2d4baa.json deleted file mode 100644 index c77ab2d8b9b121a5c609554b3e3a83a3568c91fe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/GenVRadmin_llama38bGenZ_Vikas-Merged/22a01298-038f-4069-b847-43409d2d4baa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/GenVRadmin_llama38bGenZ_Vikas-Merged/1762652579.627924", - "retrieved_timestamp": "1762652579.627925", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "GenVRadmin/llama38bGenZ_Vikas-Merged", - "developer": "meta", - "inference_platform": "unknown", - "id": "GenVRadmin/llama38bGenZ_Vikas-Merged" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30002947734234053 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4535981003984562 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44016666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26221742021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Groq_Llama-3-Groq-8B-Tool-Use/636b3b4a-dc1f-4008-83ba-0d83fdcd5acb.json b/leaderboard_data/HFOpenLLMv2/meta/Groq_Llama-3-Groq-8B-Tool-Use/636b3b4a-dc1f-4008-83ba-0d83fdcd5acb.json deleted file mode 100644 index 13282d500be4aea7645f1c0285d9fe98b4550190..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Groq_Llama-3-Groq-8B-Tool-Use/636b3b4a-dc1f-4008-83ba-0d83fdcd5acb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Groq_Llama-3-Groq-8B-Tool-Use/1762652579.633301", - "retrieved_timestamp": "1762652579.633302", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Groq/Llama-3-Groq-8B-Tool-Use", - "developer": "meta", - "inference_platform": "unknown", - "id": "Groq/Llama-3-Groq-8B-Tool-Use" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6098230472922956 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4863384977901497 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36603125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33992686170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Gryphe_Pantheon-RP-1.0-8b-Llama-3/a3abb802-acd8-49c7-bcff-3b79a4023d96.json b/leaderboard_data/HFOpenLLMv2/meta/Gryphe_Pantheon-RP-1.0-8b-Llama-3/a3abb802-acd8-49c7-bcff-3b79a4023d96.json deleted file mode 100644 index 8c8ebceaa02e49c5ec264e06cfed0ac78a096673..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Gryphe_Pantheon-RP-1.0-8b-Llama-3/a3abb802-acd8-49c7-bcff-3b79a4023d96.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Gryphe_Pantheon-RP-1.0-8b-Llama-3/1762652579.633556", - "retrieved_timestamp": "1762652579.633556", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Gryphe/Pantheon-RP-1.0-8b-Llama-3", - "developer": "meta", - "inference_platform": "unknown", - "id": "Gryphe/Pantheon-RP-1.0-8b-Llama-3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39325212657969744 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4539075127777334 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3832395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30668218085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/HPAI-BSC_Llama3-Aloe-8B-Alpha/10d1f626-64f0-4f43-9597-1221cf94c948.json b/leaderboard_data/HFOpenLLMv2/meta/HPAI-BSC_Llama3-Aloe-8B-Alpha/10d1f626-64f0-4f43-9597-1221cf94c948.json deleted file mode 100644 index b483da9b867a70834c3739549f3343c5e248e1b1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/HPAI-BSC_Llama3-Aloe-8B-Alpha/10d1f626-64f0-4f43-9597-1221cf94c948.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HPAI-BSC_Llama3-Aloe-8B-Alpha/1762652579.6361432", - "retrieved_timestamp": "1762652579.6361442", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HPAI-BSC/Llama3-Aloe-8B-Alpha", - "developer": "meta", - "inference_platform": "unknown", - "id": "HPAI-BSC/Llama3-Aloe-8B-Alpha" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5081073773144147 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48308532966126966 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3672708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3295378989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/HPAI-BSC_Llama3.1-Aloe-Beta-8B/d7410909-8a7c-4afb-9cab-2537f837a9a1.json b/leaderboard_data/HFOpenLLMv2/meta/HPAI-BSC_Llama3.1-Aloe-Beta-8B/d7410909-8a7c-4afb-9cab-2537f837a9a1.json deleted file mode 100644 index 66a257c9a4cb28fde204c0ab11bcbbbbccbf954c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/HPAI-BSC_Llama3.1-Aloe-Beta-8B/d7410909-8a7c-4afb-9cab-2537f837a9a1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HPAI-BSC_Llama3.1-Aloe-Beta-8B/1762652579.636478", - "retrieved_timestamp": "1762652579.636513", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HPAI-BSC/Llama3.1-Aloe-Beta-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "HPAI-BSC/Llama3.1-Aloe-Beta-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7253276860951166 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5092760762748857 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18277945619335348 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3834583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35804521276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Hastagaras_Llama-3.1-Jamet-8B-MK.I/be7d90fa-86be-4f3b-a3ef-2e1475b7bd64.json b/leaderboard_data/HFOpenLLMv2/meta/Hastagaras_Llama-3.1-Jamet-8B-MK.I/be7d90fa-86be-4f3b-a3ef-2e1475b7bd64.json deleted file mode 100644 index 07983eeeef7fcde8b310a4b69c65fcfd2ed2629d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Hastagaras_Llama-3.1-Jamet-8B-MK.I/be7d90fa-86be-4f3b-a3ef-2e1475b7bd64.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Hastagaras_Llama-3.1-Jamet-8B-MK.I/1762652579.637886", - "retrieved_timestamp": "1762652579.637887", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Hastagaras/Llama-3.1-Jamet-8B-MK.I", - "developer": "meta", - "inference_platform": "unknown", - "id": "Hastagaras/Llama-3.1-Jamet-8B-MK.I" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7338207068356406 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5048666433733161 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3726041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3482380319148936 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Hastagaras_Zabuza-8B-Llama-3.1/fb698ce2-d422-46eb-aa98-17fb7645461a.json b/leaderboard_data/HFOpenLLMv2/meta/Hastagaras_Zabuza-8B-Llama-3.1/fb698ce2-d422-46eb-aa98-17fb7645461a.json deleted file mode 100644 index c006aae5dcb8ea905c96f1677ae9a874c5cbf60b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Hastagaras_Zabuza-8B-Llama-3.1/fb698ce2-d422-46eb-aa98-17fb7645461a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Hastagaras_Zabuza-8B-Llama-3.1/1762652579.638141", - "retrieved_timestamp": "1762652579.6381419", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Hastagaras/Zabuza-8B-Llama-3.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Hastagaras/Zabuza-8B-Llama-3.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6265342624237025 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4538915742546196 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3567916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29230385638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/HiroseKoichi_Llama-Salad-4x8B-V3/69037dce-5276-4e26-aa05-0a7bd2c4739b.json b/leaderboard_data/HFOpenLLMv2/meta/HiroseKoichi_Llama-Salad-4x8B-V3/69037dce-5276-4e26-aa05-0a7bd2c4739b.json deleted file mode 100644 index 358d6e0a5cb4bb0ac67268a9c2d3e0a9f4ee8113..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/HiroseKoichi_Llama-Salad-4x8B-V3/69037dce-5276-4e26-aa05-0a7bd2c4739b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HiroseKoichi_Llama-Salad-4x8B-V3/1762652579.640251", - "retrieved_timestamp": "1762652579.6402519", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HiroseKoichi/Llama-Salad-4x8B-V3", - "developer": "meta", - "inference_platform": "unknown", - "id": "HiroseKoichi/Llama-Salad-4x8B-V3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6653523761397536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5244649789001753 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09592145015105741 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37403125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.351811835106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.942 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/HoangHa_Pensez-Llama3.1-8B/d27e73c5-654c-48c6-ad60-652a60bda72c.json b/leaderboard_data/HFOpenLLMv2/meta/HoangHa_Pensez-Llama3.1-8B/d27e73c5-654c-48c6-ad60-652a60bda72c.json deleted file mode 100644 index 7e8fadcfa182b35f1693cc1e97877841824c47f2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/HoangHa_Pensez-Llama3.1-8B/d27e73c5-654c-48c6-ad60-652a60bda72c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HoangHa_Pensez-Llama3.1-8B/1762652579.640512", - "retrieved_timestamp": "1762652579.640512", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HoangHa/Pensez-Llama3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "HoangHa/Pensez-Llama3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3886809221753835 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46691313514505667 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1148036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3596979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31258311170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/IDEA-CCNL_Ziya-LLaMA-13B-v1/98616cce-563a-4977-b5c0-bf8df3102303.json b/leaderboard_data/HFOpenLLMv2/meta/IDEA-CCNL_Ziya-LLaMA-13B-v1/98616cce-563a-4977-b5c0-bf8df3102303.json deleted file mode 100644 index 28a38220d3db802aeda1c27e43f3fc399d275100..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/IDEA-CCNL_Ziya-LLaMA-13B-v1/98616cce-563a-4977-b5c0-bf8df3102303.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/IDEA-CCNL_Ziya-LLaMA-13B-v1/1762652579.645581", - "retrieved_timestamp": "1762652579.645581", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "IDEA-CCNL/Ziya-LLaMA-13B-v1", - "developer": "meta", - "inference_platform": "unknown", - "id": "IDEA-CCNL/Ziya-LLaMA-13B-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16968643200042555 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28770292445409473 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37505208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11012300531914894 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Infinirc_Infinirc-Llama3-8B-2G-Release-v1.0/8c8a47f2-c8cf-4ea8-b0ee-0180aeb1b9f0.json b/leaderboard_data/HFOpenLLMv2/meta/Infinirc_Infinirc-Llama3-8B-2G-Release-v1.0/8c8a47f2-c8cf-4ea8-b0ee-0180aeb1b9f0.json deleted file mode 100644 index b728423371252f7b19cb1b5bcf4afbdab5007b17..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Infinirc_Infinirc-Llama3-8B-2G-Release-v1.0/8c8a47f2-c8cf-4ea8-b0ee-0180aeb1b9f0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Infinirc_Infinirc-Llama3-8B-2G-Release-v1.0/1762652579.6465652", - "retrieved_timestamp": "1762652579.6465652", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Infinirc/Infinirc-Llama3-8B-2G-Release-v1.0", - "developer": "meta", - "inference_platform": "unknown", - "id": "Infinirc/Infinirc-Llama3-8B-2G-Release-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20243398626754788 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43507435668237937 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4609375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21600731382978725 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/IntervitensInc_internlm2_5-20b-llamafied/5be7b084-b018-457a-a5d6-c9e3e9d3f70e.json b/leaderboard_data/HFOpenLLMv2/meta/IntervitensInc_internlm2_5-20b-llamafied/5be7b084-b018-457a-a5d6-c9e3e9d3f70e.json deleted file mode 100644 index 66c9c838cc22ba00f58cf3f36c0435add76eccb2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/IntervitensInc_internlm2_5-20b-llamafied/5be7b084-b018-457a-a5d6-c9e3e9d3f70e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/IntervitensInc_internlm2_5-20b-llamafied/1762652579.6480021", - "retrieved_timestamp": "1762652579.648003", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "IntervitensInc/internlm2_5-20b-llamafied", - "developer": "meta", - "inference_platform": "unknown", - "id": "IntervitensInc/internlm2_5-20b-llamafied" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3409952260003457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7478466526577329 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1714501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44754166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4050864361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 19.861 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/JackFram_llama-160m/11a0fc6d-5370-456e-8c01-5d7ed19e4b59.json b/leaderboard_data/HFOpenLLMv2/meta/JackFram_llama-160m/11a0fc6d-5370-456e-8c01-5d7ed19e4b59.json deleted file mode 100644 index 86e36ecea884bd60c9e01a628d7a3386b8e097ae..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/JackFram_llama-160m/11a0fc6d-5370-456e-8c01-5d7ed19e4b59.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JackFram_llama-160m/1762652579.649858", - "retrieved_timestamp": "1762652579.649858", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JackFram/llama-160m", - "developer": "meta", - "inference_platform": "unknown", - "id": "JackFram/llama-160m" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1791036671586945 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28880217539042424 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3792083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11278257978723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.162 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/JackFram_llama-68m/3b05e3fd-4bf0-42a3-8dc5-13292ece8c77.json b/leaderboard_data/HFOpenLLMv2/meta/JackFram_llama-68m/3b05e3fd-4bf0-42a3-8dc5-13292ece8c77.json deleted file mode 100644 index f4816cbdcea8fe16d4e44c308dbd97cf68ee670b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/JackFram_llama-68m/3b05e3fd-4bf0-42a3-8dc5-13292ece8c77.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/JackFram_llama-68m/1762652579.650121", - "retrieved_timestamp": "1762652579.650121", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "JackFram/llama-68m", - "developer": "meta", - "inference_platform": "unknown", - "id": "JackFram/llama-68m" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17263416623448008 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29362986509336414 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3909895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11436170212765957 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.068 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Joseph717171_Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32/26dd2a1f-27ae-4311-9b80-f5a8f0fa456a.json b/leaderboard_data/HFOpenLLMv2/meta/Joseph717171_Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32/26dd2a1f-27ae-4311-9b80-f5a8f0fa456a.json deleted file mode 100644 index c007855ad9f57e082a76214de6dce98ef2f76876..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Joseph717171_Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32/26dd2a1f-27ae-4311-9b80-f5a8f0fa456a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Joseph717171_Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32/1762652579.694483", - "retrieved_timestamp": "1762652579.694484", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Joseph717171/Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32", - "developer": "meta", - "inference_platform": "unknown", - "id": "Joseph717171/Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6185410266980501 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5177452540141246 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4369375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31441156914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Joseph717171_Llama-3.1-SuperNova-8B-Lite_TIES_with_Base/e5843711-00cb-4167-a47d-4874af0c3ba2.json b/leaderboard_data/HFOpenLLMv2/meta/Joseph717171_Llama-3.1-SuperNova-8B-Lite_TIES_with_Base/e5843711-00cb-4167-a47d-4874af0c3ba2.json deleted file mode 100644 index c70d1b52f86370a8d57fb3c4d8cda2ae694bf9e8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Joseph717171_Llama-3.1-SuperNova-8B-Lite_TIES_with_Base/e5843711-00cb-4167-a47d-4874af0c3ba2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Joseph717171_Llama-3.1-SuperNova-8B-Lite_TIES_with_Base/1762652579.6947358", - "retrieved_timestamp": "1762652579.694737", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Joseph717171/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base", - "developer": "meta", - "inference_platform": "unknown", - "id": "Joseph717171/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8096328851890761 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5147423127141911 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18353474320241692 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4109895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38804853723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Josephgflowers_Differential-Attention-Liquid-Metal-Tinyllama/670580f3-ca8a-473d-a3df-8c01952bda00.json b/leaderboard_data/HFOpenLLMv2/meta/Josephgflowers_Differential-Attention-Liquid-Metal-Tinyllama/670580f3-ca8a-473d-a3df-8c01952bda00.json deleted file mode 100644 index 699771a1d6c0fece0ea9ad1dfacc09b85be3e1a3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Josephgflowers_Differential-Attention-Liquid-Metal-Tinyllama/670580f3-ca8a-473d-a3df-8c01952bda00.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Josephgflowers_Differential-Attention-Liquid-Metal-Tinyllama/1762652579.695199", - "retrieved_timestamp": "1762652579.6952", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Josephgflowers/Differential-Attention-Liquid-Metal-Tinyllama", - "developer": "meta", - "inference_platform": "unknown", - "id": "Josephgflowers/Differential-Attention-Liquid-Metal-Tinyllama" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22269245601670234 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.292556113105267 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33555208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12142619680851063 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Josephgflowers_TinyLlama-Cinder-Agent-v1/00332c0d-d698-4ecd-9c2d-5f56921709d5.json b/leaderboard_data/HFOpenLLMv2/meta/Josephgflowers_TinyLlama-Cinder-Agent-v1/00332c0d-d698-4ecd-9c2d-5f56921709d5.json deleted file mode 100644 index 9077feb28eb7582d80ba3b272e1f1878a57d1eb3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Josephgflowers_TinyLlama-Cinder-Agent-v1/00332c0d-d698-4ecd-9c2d-5f56921709d5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Josephgflowers_TinyLlama-Cinder-Agent-v1/1762652579.695456", - "retrieved_timestamp": "1762652579.695457", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Josephgflowers/TinyLlama-Cinder-Agent-v1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Josephgflowers/TinyLlama-Cinder-Agent-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26695612087040166 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31160367351776513 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24412751677852348 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33945833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11610704787234043 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Josephgflowers_TinyLlama-v1.1-Cinders-World/2b993039-8980-4578-a9e2-a22a39385664.json b/leaderboard_data/HFOpenLLMv2/meta/Josephgflowers_TinyLlama-v1.1-Cinders-World/2b993039-8980-4578-a9e2-a22a39385664.json deleted file mode 100644 index 4f9c664a56d656919908ab926157ee30cb65f523..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Josephgflowers_TinyLlama-v1.1-Cinders-World/2b993039-8980-4578-a9e2-a22a39385664.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Josephgflowers_TinyLlama-v1.1-Cinders-World/1762652579.6958752", - "retrieved_timestamp": "1762652579.6958761", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Josephgflowers/TinyLlama-v1.1-Cinders-World", - "developer": "meta", - "inference_platform": "unknown", - "id": "Josephgflowers/TinyLlama-v1.1-Cinders-World" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24692260978647768 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29979653176003074 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24412751677852348 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3356145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11984707446808511 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Josephgflowers_TinyLlama_v1.1_math_code-world-test-1/72cf7999-e4cb-4987-a694-cdcfae37bb02.json b/leaderboard_data/HFOpenLLMv2/meta/Josephgflowers_TinyLlama_v1.1_math_code-world-test-1/72cf7999-e4cb-4987-a694-cdcfae37bb02.json deleted file mode 100644 index 251cbf694d061032b7eba6e4bb64b3426a6ec5d0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Josephgflowers_TinyLlama_v1.1_math_code-world-test-1/72cf7999-e4cb-4987-a694-cdcfae37bb02.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Josephgflowers_TinyLlama_v1.1_math_code-world-test-1/1762652579.696125", - "retrieved_timestamp": "1762652579.696125", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Josephgflowers/TinyLlama_v1.1_math_code-world-test-1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Josephgflowers/TinyLlama_v1.1_math_code-world-test-1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00784363267242029 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31463497508928434 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23406040268456377 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34990625000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11319813829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Josephgflowers_Tinyllama-STEM-Cinder-Agent-v1/0c22748e-74ad-4bac-a714-c64a19a88af7.json b/leaderboard_data/HFOpenLLMv2/meta/Josephgflowers_Tinyllama-STEM-Cinder-Agent-v1/0c22748e-74ad-4bac-a714-c64a19a88af7.json deleted file mode 100644 index 765068c86b115607965e5148dff41287cc3a12c8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Josephgflowers_Tinyllama-STEM-Cinder-Agent-v1/0c22748e-74ad-4bac-a714-c64a19a88af7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Josephgflowers_Tinyllama-STEM-Cinder-Agent-v1/1762652579.696357", - "retrieved_timestamp": "1762652579.696357", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Josephgflowers/Tinyllama-STEM-Cinder-Agent-v1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Josephgflowers/Tinyllama-STEM-Cinder-Agent-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21257596510591897 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30843808427144626 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.334125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10862699468085106 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Josephgflowers_Tinyllama-r1/4293bc9f-4968-4af9-acd2-0ada64be43d4.json b/leaderboard_data/HFOpenLLMv2/meta/Josephgflowers_Tinyllama-r1/4293bc9f-4968-4af9-acd2-0ada64be43d4.json deleted file mode 100644 index d9debcce2d5245e41b8b2974677bd378e7910452..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Josephgflowers_Tinyllama-r1/4293bc9f-4968-4af9-acd2-0ada64be43d4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Josephgflowers_Tinyllama-r1/1762652579.6965919", - "retrieved_timestamp": "1762652579.6965928", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Josephgflowers/Tinyllama-r1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Josephgflowers/Tinyllama-r1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2119265770378152 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3014631984266974 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33148958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11344747340425532 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/KingNish_Reasoning-Llama-3b-v0.1/5f6f312f-3131-417d-b12e-3e30bb998d27.json b/leaderboard_data/HFOpenLLMv2/meta/KingNish_Reasoning-Llama-3b-v0.1/5f6f312f-3131-417d-b12e-3e30bb998d27.json deleted file mode 100644 index 5709704cf739028f479c1e52a3c4c3f6db4d34e8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/KingNish_Reasoning-Llama-3b-v0.1/5f6f312f-3131-417d-b12e-3e30bb998d27.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/KingNish_Reasoning-Llama-3b-v0.1/1762652579.69997", - "retrieved_timestamp": "1762652579.699971", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "KingNish/Reasoning-Llama-3b-v0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "KingNish/Reasoning-Llama-3b-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6224628430342602 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43433592509582786 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31676041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3029421542553192 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Kukedlc_NeuralLLaMa-3-8b-DT-v0.1/ec1bea6a-91e2-41c9-ab54-af84bf1a1d15.json b/leaderboard_data/HFOpenLLMv2/meta/Kukedlc_NeuralLLaMa-3-8b-DT-v0.1/ec1bea6a-91e2-41c9-ab54-af84bf1a1d15.json deleted file mode 100644 index 91395f060b01242581471ecd8cf6db949a0deb6b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Kukedlc_NeuralLLaMa-3-8b-DT-v0.1/ec1bea6a-91e2-41c9-ab54-af84bf1a1d15.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralLLaMa-3-8b-DT-v0.1/1762652579.7021902", - "retrieved_timestamp": "1762652579.702191", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Kukedlc/NeuralLLaMa-3-8b-DT-v0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Kukedlc/NeuralLLaMa-3-8b-DT-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4371412297149342 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4986771544360115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08081570996978851 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40711458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.379155585106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Kukedlc_NeuralLLaMa-3-8b-ORPO-v0.3/02d060d9-d545-445b-8d22-4ae117b8f324.json b/leaderboard_data/HFOpenLLMv2/meta/Kukedlc_NeuralLLaMa-3-8b-ORPO-v0.3/02d060d9-d545-445b-8d22-4ae117b8f324.json deleted file mode 100644 index c3ddbe96c2e85d6a4b8bbe101040ca1cd7b054ac..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Kukedlc_NeuralLLaMa-3-8b-ORPO-v0.3/02d060d9-d545-445b-8d22-4ae117b8f324.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralLLaMa-3-8b-ORPO-v0.3/1762652579.7024388", - "retrieved_timestamp": "1762652579.70244", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3", - "developer": "meta", - "inference_platform": "unknown", - "id": "Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5275912356990563 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4557141539616392 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23909395973154363 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37003125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3056848404255319 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/LEESM_llama-2-7b-hf-lora-oki100p/13881952-9fe3-4308-93d5-912e59465d6e.json b/leaderboard_data/HFOpenLLMv2/meta/LEESM_llama-2-7b-hf-lora-oki100p/13881952-9fe3-4308-93d5-912e59465d6e.json deleted file mode 100644 index b57bcb162a3419ce9d31f9fcfae1249311448691..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/LEESM_llama-2-7b-hf-lora-oki100p/13881952-9fe3-4308-93d5-912e59465d6e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LEESM_llama-2-7b-hf-lora-oki100p/1762652579.704138", - "retrieved_timestamp": "1762652579.704139", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LEESM/llama-2-7b-hf-lora-oki100p", - "developer": "meta", - "inference_platform": "unknown", - "id": "LEESM/llama-2-7b-hf-lora-oki100p" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25129434345314877 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34916752720369776 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3687291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18558843085106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/LEESM_llama-2-7b-hf-lora-oki10p/9fb11511-0c66-495a-b634-da6bb0934706.json b/leaderboard_data/HFOpenLLMv2/meta/LEESM_llama-2-7b-hf-lora-oki10p/9fb11511-0c66-495a-b634-da6bb0934706.json deleted file mode 100644 index 66aa649c07d841e329ccb8b280d5c8dd6f6bccc4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/LEESM_llama-2-7b-hf-lora-oki10p/9fb11511-0c66-495a-b634-da6bb0934706.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LEESM_llama-2-7b-hf-lora-oki10p/1762652579.704393", - "retrieved_timestamp": "1762652579.704394", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LEESM/llama-2-7b-hf-lora-oki10p", - "developer": "meta", - "inference_platform": "unknown", - "id": "LEESM/llama-2-7b-hf-lora-oki10p" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22701432199896276 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3530929513059229 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34752083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16788563829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/LEESM_llama-3-8b-bnb-4b-kowiki231101/5f540be5-6932-41f4-b588-b88f8cfb89c7.json b/leaderboard_data/HFOpenLLMv2/meta/LEESM_llama-3-8b-bnb-4b-kowiki231101/5f540be5-6932-41f4-b588-b88f8cfb89c7.json deleted file mode 100644 index 8096d7f1ca6dc3e7984e6b458f5d9d2c4febd94f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/LEESM_llama-3-8b-bnb-4b-kowiki231101/5f540be5-6932-41f4-b588-b88f8cfb89c7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LEESM_llama-3-8b-bnb-4b-kowiki231101/1762652579.704602", - "retrieved_timestamp": "1762652579.704603", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LEESM/llama-3-8b-bnb-4b-kowiki231101", - "developer": "meta", - "inference_platform": "unknown", - "id": "LEESM/llama-3-8b-bnb-4b-kowiki231101" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16848739123303944 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4130805653617178 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3551458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24251994680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/LEESM_llama-3-Korean-Bllossom-8B-trexlab-oki10p/629b8df0-6ce3-4230-baf7-45b3944bf0d5.json b/leaderboard_data/HFOpenLLMv2/meta/LEESM_llama-3-Korean-Bllossom-8B-trexlab-oki10p/629b8df0-6ce3-4230-baf7-45b3944bf0d5.json deleted file mode 100644 index d5123df007b07c648660ca6cff418b0b783c0a80..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/LEESM_llama-3-Korean-Bllossom-8B-trexlab-oki10p/629b8df0-6ce3-4230-baf7-45b3944bf0d5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LEESM_llama-3-Korean-Bllossom-8B-trexlab-oki10p/1762652579.7048151", - "retrieved_timestamp": "1762652579.704816", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LEESM/llama-3-Korean-Bllossom-8B-trexlab-oki10p", - "developer": "meta", - "inference_platform": "unknown", - "id": "LEESM/llama-3-Korean-Bllossom-8B-trexlab-oki10p" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21372513818889433 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43430121169320707 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38692708333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3176529255319149 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/LimYeri_CodeMind-Llama3-8B-unsloth_v2-merged/0338e807-8f8e-41d9-b4ac-d80239340678.json b/leaderboard_data/HFOpenLLMv2/meta/LimYeri_CodeMind-Llama3-8B-unsloth_v2-merged/0338e807-8f8e-41d9-b4ac-d80239340678.json deleted file mode 100644 index 4c5448de0b021adb1d4e459d7d186ff51018d0f9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/LimYeri_CodeMind-Llama3-8B-unsloth_v2-merged/0338e807-8f8e-41d9-b4ac-d80239340678.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LimYeri_CodeMind-Llama3-8B-unsloth_v2-merged/1762652579.733024", - "retrieved_timestamp": "1762652579.733025", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LimYeri/CodeMind-Llama3-8B-unsloth_v2-merged", - "developer": "meta", - "inference_platform": "unknown", - "id": "LimYeri/CodeMind-Llama3-8B-unsloth_v2-merged" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6946280314011268 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48600920882996324 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3316145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3505651595744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/LimYeri_CodeMind-Llama3-8B-unsloth_v3-merged/c96743a9-b5ca-40ab-a86a-ed1c7ab8ddfd.json b/leaderboard_data/HFOpenLLMv2/meta/LimYeri_CodeMind-Llama3-8B-unsloth_v3-merged/c96743a9-b5ca-40ab-a86a-ed1c7ab8ddfd.json deleted file mode 100644 index e92d76201427acb45a470fc0b56e4de4943580bc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/LimYeri_CodeMind-Llama3-8B-unsloth_v3-merged/c96743a9-b5ca-40ab-a86a-ed1c7ab8ddfd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LimYeri_CodeMind-Llama3-8B-unsloth_v3-merged/1762652579.733407", - "retrieved_timestamp": "1762652579.7334101", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LimYeri/CodeMind-Llama3-8B-unsloth_v3-merged", - "developer": "meta", - "inference_platform": "unknown", - "id": "LimYeri/CodeMind-Llama3-8B-unsloth_v3-merged" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6762933460994606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4908161460506797 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3356145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34956781914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/LimYeri_CodeMind-Llama3-8B-unsloth_v4-one-merged/0f52efcb-1b9b-4df1-820b-a8c0698481a7.json b/leaderboard_data/HFOpenLLMv2/meta/LimYeri_CodeMind-Llama3-8B-unsloth_v4-one-merged/0f52efcb-1b9b-4df1-820b-a8c0698481a7.json deleted file mode 100644 index 8248f8565327fbbe8958792a126ad39dab7ce284..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/LimYeri_CodeMind-Llama3-8B-unsloth_v4-one-merged/0f52efcb-1b9b-4df1-820b-a8c0698481a7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LimYeri_CodeMind-Llama3-8B-unsloth_v4-one-merged/1762652579.7341938", - "retrieved_timestamp": "1762652579.734195", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-merged", - "developer": "meta", - "inference_platform": "unknown", - "id": "LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-merged" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32108693821283085 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47387586084568856 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40692708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33527260638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/LimYeri_CodeMind-Llama3.1-8B-unsloth-merged/82d77852-64e4-4dd0-a636-785958786fd2.json b/leaderboard_data/HFOpenLLMv2/meta/LimYeri_CodeMind-Llama3.1-8B-unsloth-merged/82d77852-64e4-4dd0-a636-785958786fd2.json deleted file mode 100644 index 30f65fdb5594080b901c8376414f7da895de3eba..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/LimYeri_CodeMind-Llama3.1-8B-unsloth-merged/82d77852-64e4-4dd0-a636-785958786fd2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/LimYeri_CodeMind-Llama3.1-8B-unsloth-merged/1762652579.7344582", - "retrieved_timestamp": "1762652579.734459", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "LimYeri/CodeMind-Llama3.1-8B-unsloth-merged", - "developer": "meta", - "inference_platform": "unknown", - "id": "LimYeri/CodeMind-Llama3.1-8B-unsloth-merged" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6490157227268093 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4694777854416285 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10876132930513595 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37523958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33402593085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Locutusque_Hercules-6.0-Llama-3.1-8B/2084dde6-b1e3-457b-9854-ace18cc5d943.json b/leaderboard_data/HFOpenLLMv2/meta/Locutusque_Hercules-6.0-Llama-3.1-8B/2084dde6-b1e3-457b-9854-ace18cc5d943.json deleted file mode 100644 index 279bac2e44acdcda58b34d1b9dfd15d030faa653..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Locutusque_Hercules-6.0-Llama-3.1-8B/2084dde6-b1e3-457b-9854-ace18cc5d943.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Locutusque_Hercules-6.0-Llama-3.1-8B/1762652579.734967", - "retrieved_timestamp": "1762652579.734968", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Locutusque/Hercules-6.0-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Locutusque/Hercules-6.0-Llama-3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6630041622893922 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48133037900119535 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16691842900302115 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.362125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3614527925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Locutusque_Hercules-6.1-Llama-3.1-8B/267ac6ef-168e-489b-a7cc-0ff448b0acbf.json b/leaderboard_data/HFOpenLLMv2/meta/Locutusque_Hercules-6.1-Llama-3.1-8B/267ac6ef-168e-489b-a7cc-0ff448b0acbf.json deleted file mode 100644 index 4643b93526ca30a8f6abe4550f7dcc30bfaa40f3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Locutusque_Hercules-6.1-Llama-3.1-8B/267ac6ef-168e-489b-a7cc-0ff448b0acbf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Locutusque_Hercules-6.1-Llama-3.1-8B/1762652579.735234", - "retrieved_timestamp": "1762652579.735234", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Locutusque/Hercules-6.1-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Locutusque/Hercules-6.1-Llama-3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6006806384836678 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46562423765034017 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17598187311178248 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35533333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36685505319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Locutusque_Llama-3-NeuralHercules-5.0-8B/0c540f58-808b-42fc-b4b9-346367742f70.json b/leaderboard_data/HFOpenLLMv2/meta/Locutusque_Llama-3-NeuralHercules-5.0-8B/0c540f58-808b-42fc-b4b9-346367742f70.json deleted file mode 100644 index 16855044899133edf81cb729a851624c12ec79bf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Locutusque_Llama-3-NeuralHercules-5.0-8B/0c540f58-808b-42fc-b4b9-346367742f70.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Locutusque_Llama-3-NeuralHercules-5.0-8B/1762652579.735453", - "retrieved_timestamp": "1762652579.735453", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Locutusque/Llama-3-NeuralHercules-5.0-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Locutusque/Llama-3-NeuralHercules-5.0-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4489310584803876 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3940474241916672 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3880729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29330119680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Locutusque_Llama-3-Yggdrasil-2.0-8B/478f0d4e-41e5-41c7-b9da-07db69c1d561.json b/leaderboard_data/HFOpenLLMv2/meta/Locutusque_Llama-3-Yggdrasil-2.0-8B/478f0d4e-41e5-41c7-b9da-07db69c1d561.json deleted file mode 100644 index 6b67e734ba54bf09c4250260f5192de9cb1150f5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Locutusque_Llama-3-Yggdrasil-2.0-8B/478f0d4e-41e5-41c7-b9da-07db69c1d561.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Locutusque_Llama-3-Yggdrasil-2.0-8B/1762652579.7359009", - "retrieved_timestamp": "1762652579.735904", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Locutusque/Llama-3-Yggdrasil-2.0-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Locutusque/Llama-3-Yggdrasil-2.0-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5370583385417359 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47724551424666856 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39765625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.316655585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Lyte_Llama-3.2-3B-Overthinker/d997330d-6679-4d63-839c-677694ea4abc.json b/leaderboard_data/HFOpenLLMv2/meta/Lyte_Llama-3.2-3B-Overthinker/d997330d-6679-4d63-839c-677694ea4abc.json deleted file mode 100644 index b0a47abaadb1319c54bf39803d4a014ad6e4d134..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Lyte_Llama-3.2-3B-Overthinker/d997330d-6679-4d63-839c-677694ea4abc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Lyte_Llama-3.2-3B-Overthinker/1762652579.741945", - "retrieved_timestamp": "1762652579.7419462", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Lyte/Llama-3.2-3B-Overthinker", - "developer": "meta", - "inference_platform": "unknown", - "id": "Lyte/Llama-3.2-3B-Overthinker" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6407975283359264 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4320093097952517 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34190625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29853723404255317 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/MLP-KTLim_llama-3-Korean-Bllossom-8B/31a37662-052e-440c-a475-66543b6c52b1.json b/leaderboard_data/HFOpenLLMv2/meta/MLP-KTLim_llama-3-Korean-Bllossom-8B/31a37662-052e-440c-a475-66543b6c52b1.json deleted file mode 100644 index b58c8b879b2c782ba0f1dc733239246a4389c0f2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/MLP-KTLim_llama-3-Korean-Bllossom-8B/31a37662-052e-440c-a475-66543b6c52b1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MLP-KTLim_llama-3-Korean-Bllossom-8B/1762652579.7427032", - "retrieved_timestamp": "1762652579.7427042", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MLP-KTLim/llama-3-Korean-Bllossom-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "MLP-KTLim/llama-3-Korean-Bllossom-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5112800702136997 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49004556470187666 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3674583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.359375 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Magpie-Align_Llama-3-8B-Magpie-Align-v0.1/c819ae59-5f32-4bba-a835-84fa9497de6b.json b/leaderboard_data/HFOpenLLMv2/meta/Magpie-Align_Llama-3-8B-Magpie-Align-v0.1/c819ae59-5f32-4bba-a835-84fa9497de6b.json deleted file mode 100644 index bb7702bf47cac60e78afe778a5fe45dd7817afef..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Magpie-Align_Llama-3-8B-Magpie-Align-v0.1/c819ae59-5f32-4bba-a835-84fa9497de6b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3-8B-Magpie-Align-v0.1/1762652579.744125", - "retrieved_timestamp": "1762652579.7441258", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4027192294223771 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47894081019705514 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3086979166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30011635638297873 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Magpie-Align_Llama-3-8B-Magpie-Align-v0.1/ced5680b-ff4a-42be-a609-6fc2541d6109.json b/leaderboard_data/HFOpenLLMv2/meta/Magpie-Align_Llama-3-8B-Magpie-Align-v0.1/ced5680b-ff4a-42be-a609-6fc2541d6109.json deleted file mode 100644 index 51865012b33888c2b4d4494eda7b929c5a2dd8a1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Magpie-Align_Llama-3-8B-Magpie-Align-v0.1/ced5680b-ff4a-42be-a609-6fc2541d6109.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3-8B-Magpie-Align-v0.1/1762652579.743867", - "retrieved_timestamp": "1762652579.7438679", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4118117705465941 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4811441560714845 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.033987915407854986 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3046979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3006150265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Magpie-Align_Llama-3-8B-Magpie-Align-v0.3/f58be76c-043d-4ad9-81df-9a94d380808c.json b/leaderboard_data/HFOpenLLMv2/meta/Magpie-Align_Llama-3-8B-Magpie-Align-v0.3/f58be76c-043d-4ad9-81df-9a94d380808c.json deleted file mode 100644 index c5d4b21f82aaa168a8536db7daa49a7c42ed0c23..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Magpie-Align_Llama-3-8B-Magpie-Align-v0.3/f58be76c-043d-4ad9-81df-9a94d380808c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3-8B-Magpie-Align-v0.3/1762652579.7443142", - "retrieved_timestamp": "1762652579.744315", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.3", - "developer": "meta", - "inference_platform": "unknown", - "id": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44970566984490046 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.456960506522001 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31341422872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Magpie-Align_Llama-3.1-8B-Magpie-Align-v0.1/80e08062-397f-40d4-b6b2-a3e03d9cc320.json b/leaderboard_data/HFOpenLLMv2/meta/Magpie-Align_Llama-3.1-8B-Magpie-Align-v0.1/80e08062-397f-40d4-b6b2-a3e03d9cc320.json deleted file mode 100644 index 1a25929c4daee71e293e2fe2f3bac43f99fae67e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Magpie-Align_Llama-3.1-8B-Magpie-Align-v0.1/80e08062-397f-40d4-b6b2-a3e03d9cc320.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3.1-8B-Magpie-Align-v0.1/1762652579.744737", - "retrieved_timestamp": "1762652579.744738", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Magpie-Align/Llama-3.1-8B-Magpie-Align-v0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Magpie-Align/Llama-3.1-8B-Magpie-Align-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4457838535086903 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46223963164680143 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31406249999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32621343085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/MagusCorp_grpo_lora_enem_llama3_7b/22c931f2-cf99-46b1-b4f8-50db5a172a66.json b/leaderboard_data/HFOpenLLMv2/meta/MagusCorp_grpo_lora_enem_llama3_7b/22c931f2-cf99-46b1-b4f8-50db5a172a66.json deleted file mode 100644 index c88f7e3842205ac9c286661cce3a4868cfe85492..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/MagusCorp_grpo_lora_enem_llama3_7b/22c931f2-cf99-46b1-b4f8-50db5a172a66.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MagusCorp_grpo_lora_enem_llama3_7b/1762652579.745377", - "retrieved_timestamp": "1762652579.745378", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MagusCorp/grpo_lora_enem_llama3_7b", - "developer": "meta", - "inference_platform": "unknown", - "id": "MagusCorp/grpo_lora_enem_llama3_7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4723622211288271 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48014581980384746 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.397125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35738031914893614 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/MaziyarPanahi_calme-2.1-llama3.1-70b/e216df49-368d-457f-9153-e33741b7b847.json b/leaderboard_data/HFOpenLLMv2/meta/MaziyarPanahi_calme-2.1-llama3.1-70b/e216df49-368d-457f-9153-e33741b7b847.json deleted file mode 100644 index 0592c13776b9086041e51f2d993b7c118e48a8cf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/MaziyarPanahi_calme-2.1-llama3.1-70b/e216df49-368d-457f-9153-e33741b7b847.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-llama3.1-70b/1762652579.751613", - "retrieved_timestamp": "1762652579.7516139", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.1-llama3.1-70b", - "developer": "meta", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.1-llama3.1-70b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8434298771703524 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.644755327496552 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43803125000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5282579787234043 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/MaziyarPanahi_calme-2.2-llama3-70b/8b86e8c3-eb04-41a8-91e3-3eef396aca4f.json b/leaderboard_data/HFOpenLLMv2/meta/MaziyarPanahi_calme-2.2-llama3-70b/8b86e8c3-eb04-41a8-91e3-3eef396aca4f.json deleted file mode 100644 index effb498db895f5dc92fef3a1d2ba0a724db4c45c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/MaziyarPanahi_calme-2.2-llama3-70b/8b86e8c3-eb04-41a8-91e3-3eef396aca4f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-llama3-70b/1762652579.753183", - "retrieved_timestamp": "1762652579.753183", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.2-llama3-70b", - "developer": "meta", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.2-llama3-70b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8208486814984242 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6435431762417703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2394259818731118 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4445729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206948138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/MaziyarPanahi_calme-2.2-llama3.1-70b/9112c2ec-cf0e-4d2c-9261-14ebb8706d69.json b/leaderboard_data/HFOpenLLMv2/meta/MaziyarPanahi_calme-2.2-llama3.1-70b/9112c2ec-cf0e-4d2c-9261-14ebb8706d69.json deleted file mode 100644 index 1b7b941b9647a376c4a8a7f141637390c472ff7c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/MaziyarPanahi_calme-2.2-llama3.1-70b/9112c2ec-cf0e-4d2c-9261-14ebb8706d69.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-llama3.1-70b/1762652579.753403", - "retrieved_timestamp": "1762652579.753404", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.2-llama3.1-70b", - "developer": "meta", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.2-llama3.1-70b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8592667455684251 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6792920009427085 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43655589123867067 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45415625000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5414727393617021 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/MaziyarPanahi_calme-2.3-llama3-70b/66d7e97b-0a79-4d39-8d6b-cf083239aa93.json b/leaderboard_data/HFOpenLLMv2/meta/MaziyarPanahi_calme-2.3-llama3-70b/66d7e97b-0a79-4d39-8d6b-cf083239aa93.json deleted file mode 100644 index 926d4f6d61f179be0dd7735df27f472fec0423ac..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/MaziyarPanahi_calme-2.3-llama3-70b/66d7e97b-0a79-4d39-8d6b-cf083239aa93.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-llama3-70b/1762652579.7547278", - "retrieved_timestamp": "1762652579.7547278", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.3-llama3-70b", - "developer": "meta", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.3-llama3-70b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8010401290797307 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6399173489368603 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42612500000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5204454787234043 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/MaziyarPanahi_calme-2.3-llama3.1-70b/7e8b2abe-68e5-445b-ae22-5b827e53b72d.json b/leaderboard_data/HFOpenLLMv2/meta/MaziyarPanahi_calme-2.3-llama3.1-70b/7e8b2abe-68e5-445b-ae22-5b827e53b72d.json deleted file mode 100644 index ff85a2394fdc4b8780bd7f1e8740346564beda84..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/MaziyarPanahi_calme-2.3-llama3.1-70b/7e8b2abe-68e5-445b-ae22-5b827e53b72d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-llama3.1-70b/1762652579.755093", - "retrieved_timestamp": "1762652579.7550972", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.3-llama3.1-70b", - "developer": "meta", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.3-llama3.1-70b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8604657863358112 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6871653740091753 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39274924471299094 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45682291666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5363198138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/MaziyarPanahi_calme-2.4-llama3-70b/8cf1e62b-f646-4082-9d10-8cf376154d40.json b/leaderboard_data/HFOpenLLMv2/meta/MaziyarPanahi_calme-2.4-llama3-70b/8cf1e62b-f646-4082-9d10-8cf376154d40.json deleted file mode 100644 index 3729d2e3d3888c054fd4a97325bc5a5ce482aa3b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/MaziyarPanahi_calme-2.4-llama3-70b/8cf1e62b-f646-4082-9d10-8cf376154d40.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.4-llama3-70b/1762652579.7565", - "retrieved_timestamp": "1762652579.756501", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.4-llama3-70b", - "developer": "meta", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.4-llama3-70b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5027371817887649 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6418191966839487 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24471299093655588 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976510067114096 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4287916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5203623670212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/MaziyarPanahi_calme-3.1-llamaloi-3b/0acfe83d-3876-4c08-9b26-931450d24bfd.json b/leaderboard_data/HFOpenLLMv2/meta/MaziyarPanahi_calme-3.1-llamaloi-3b/0acfe83d-3876-4c08-9b26-931450d24bfd.json deleted file mode 100644 index fc9429cd5df45e6576d1030da2d32c064873fed3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/MaziyarPanahi_calme-3.1-llamaloi-3b/0acfe83d-3876-4c08-9b26-931450d24bfd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.1-llamaloi-3b/1762652579.758682", - "retrieved_timestamp": "1762652579.758683", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-3.1-llamaloi-3b", - "developer": "meta", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-3.1-llamaloi-3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7375175645066203 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4587340004998879 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1729607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35152083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3204787234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/MoonRide_Llama-3.2-3B-Khelavaster/ed373700-5ff1-4a84-8746-12ec4c278e00.json b/leaderboard_data/HFOpenLLMv2/meta/MoonRide_Llama-3.2-3B-Khelavaster/ed373700-5ff1-4a84-8746-12ec4c278e00.json deleted file mode 100644 index c1d5e2a60dde820dba126792607c018aee238d18..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/MoonRide_Llama-3.2-3B-Khelavaster/ed373700-5ff1-4a84-8746-12ec4c278e00.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MoonRide_Llama-3.2-3B-Khelavaster/1762652579.762122", - "retrieved_timestamp": "1762652579.762123", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MoonRide/Llama-3.2-3B-Khelavaster", - "developer": "meta", - "inference_platform": "unknown", - "id": "MoonRide/Llama-3.2-3B-Khelavaster" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4924954675815725 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45156712929620335 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16163141993957703 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36990625000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31216755319148937 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.607 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/NAPS-ai_naps-llama-3_1_instruct-v0.6.0/3378460d-d044-4c7e-ba9f-48cc94f0bc3f.json b/leaderboard_data/HFOpenLLMv2/meta/NAPS-ai_naps-llama-3_1_instruct-v0.6.0/3378460d-d044-4c7e-ba9f-48cc94f0bc3f.json deleted file mode 100644 index 94de29f65894648d5bcd502d45ae793fc4165732..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/NAPS-ai_naps-llama-3_1_instruct-v0.6.0/3378460d-d044-4c7e-ba9f-48cc94f0bc3f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-llama-3_1_instruct-v0.6.0/1762652579.766795", - "retrieved_timestamp": "1762652579.766796", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NAPS-ai/naps-llama-3_1_instruct-v0.6.0", - "developer": "meta", - "inference_platform": "unknown", - "id": "NAPS-ai/naps-llama-3_1_instruct-v0.6.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3280063564675062 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45284530156109354 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37390624999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3240525265957447 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/NAPS-ai_naps-llama3.1-70B-v0.2-fp16/16b6df0d-8e1b-4bec-b3f9-060273a4ad15.json b/leaderboard_data/HFOpenLLMv2/meta/NAPS-ai_naps-llama3.1-70B-v0.2-fp16/16b6df0d-8e1b-4bec-b3f9-060273a4ad15.json deleted file mode 100644 index 50d46df7c28fd1739980b1690ba08e5bcf1b220c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/NAPS-ai_naps-llama3.1-70B-v0.2-fp16/16b6df0d-8e1b-4bec-b3f9-060273a4ad15.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-llama3.1-70B-v0.2-fp16/1762652579.7671611", - "retrieved_timestamp": "1762652579.767162", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NAPS-ai/naps-llama3.1-70B-v0.2-fp16", - "developer": "meta", - "inference_platform": "unknown", - "id": "NAPS-ai/naps-llama3.1-70B-v0.2-fp16" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1844993506119319 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3040736853180832 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23909395973154363 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34860416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10987367021276596 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.761 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Naveenpoliasetty_llama3-8B-V2/53ae919d-c56b-415f-87c0-c6273730357b.json b/leaderboard_data/HFOpenLLMv2/meta/Naveenpoliasetty_llama3-8B-V2/53ae919d-c56b-415f-87c0-c6273730357b.json deleted file mode 100644 index e4c70bb91f1642e604005f7e090093b83b5a71f9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Naveenpoliasetty_llama3-8B-V2/53ae919d-c56b-415f-87c0-c6273730357b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Naveenpoliasetty_llama3-8B-V2/1762652579.769772", - "retrieved_timestamp": "1762652579.769773", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Naveenpoliasetty/llama3-8B-V2", - "developer": "meta", - "inference_platform": "unknown", - "id": "Naveenpoliasetty/llama3-8B-V2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4122616878770551 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5188657580065063 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40813541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3737533244680851 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nekochu_Llama-3.1-8B-German-ORPO/83da2d8f-542c-4d21-88f9-b83f4e960579.json b/leaderboard_data/HFOpenLLMv2/meta/Nekochu_Llama-3.1-8B-German-ORPO/83da2d8f-542c-4d21-88f9-b83f4e960579.json deleted file mode 100644 index 29f181b9496ff59fab6ee4b6aea42500cd9824ec..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nekochu_Llama-3.1-8B-German-ORPO/83da2d8f-542c-4d21-88f9-b83f4e960579.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nekochu_Llama-3.1-8B-German-ORPO/1762652579.7705338", - "retrieved_timestamp": "1762652579.7705338", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nekochu/Llama-3.1-8B-German-ORPO", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nekochu/Llama-3.1-8B-German-ORPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4610710692074806 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4982577044334462 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46475 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33934507978723405 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Dolphin3.0-Llama3.1-1B-abliterated/ed950058-9f6b-4ed6-9d41-0d2674dc19d1.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Dolphin3.0-Llama3.1-1B-abliterated/ed950058-9f6b-4ed6-9d41-0d2674dc19d1.json deleted file mode 100644 index 814929ba48acffb0e326d9315080a2ac51bf667d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Dolphin3.0-Llama3.1-1B-abliterated/ed950058-9f6b-4ed6-9d41-0d2674dc19d1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Dolphin3.0-Llama3.1-1B-abliterated/1762652579.772268", - "retrieved_timestamp": "1762652579.772269", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Dolphin3.0-Llama3.1-1B-abliterated", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Dolphin3.0-Llama3.1-1B-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5311883580012146 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3240787338568713 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2407718120805369 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32367708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1373005319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DeepDive_3_Prev_v1.0/67010272-067a-4dd4-a31d-9da58d72118e.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DeepDive_3_Prev_v1.0/67010272-067a-4dd4-a31d-9da58d72118e.json deleted file mode 100644 index 6dc9200119ea4625ee74694d2b49e17df7a2fc1c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DeepDive_3_Prev_v1.0/67010272-067a-4dd4-a31d-9da58d72118e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DeepDive_3_Prev_v1.0/1762652579.7727091", - "retrieved_timestamp": "1762652579.7727098", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_DeepDive_3_Prev_v1.0", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_DeepDive_3_Prev_v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6809144181881852 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5155095936229447 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1865558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3665833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34375 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0/9aa57eda-6d6a-449e-801d-96e16499ddd6.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0/9aa57eda-6d6a-449e-801d-96e16499ddd6.json deleted file mode 100644 index f9699768058ddabf794f6a03d1044d093e7cc0f0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0/9aa57eda-6d6a-449e-801d-96e16499ddd6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0/1762652579.772983", - "retrieved_timestamp": "1762652579.772984", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7100903380807368 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.51203649030939 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37576041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34408244680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DobHerWild_R1_v1.1R/bedae6ba-9f3b-435b-bb7f-cadb7a684804.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DobHerWild_R1_v1.1R/bedae6ba-9f3b-435b-bb7f-cadb7a684804.json deleted file mode 100644 index ac3153304634567eb5c002aeb1946d49eafa4bb3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DobHerWild_R1_v1.1R/bedae6ba-9f3b-435b-bb7f-cadb7a684804.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DobHerWild_R1_v1.1R/1762652579.773223", - "retrieved_timestamp": "1762652579.7732239", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_DobHerWild_R1_v1.1R", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_DobHerWild_R1_v1.1R" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.759999024809727 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.525696414662245 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23187311178247735 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38521875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36884973404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DoberWild_v2.01/8a3df59d-9f38-4682-a760-5fa7903cab99.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DoberWild_v2.01/8a3df59d-9f38-4682-a760-5fa7903cab99.json deleted file mode 100644 index f752c7f707c130d673c8bf0c8a7b39507fd79802..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DoberWild_v2.01/8a3df59d-9f38-4682-a760-5fa7903cab99.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DoberWild_v2.01/1762652579.7734542", - "retrieved_timestamp": "1762652579.7734542", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_DoberWild_v2.01", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_DoberWild_v2.01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7995662619627034 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5250767747736031 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2001510574018127 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4011875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3790724734042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.031 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DoberWild_v2.02/62ef54cd-d97d-473e-9dd2-42fe185e4d04.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DoberWild_v2.02/62ef54cd-d97d-473e-9dd2-42fe185e4d04.json deleted file mode 100644 index ffc315033ba08b6fe50b26581d6a3d90c2cb9ebf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DoberWild_v2.02/62ef54cd-d97d-473e-9dd2-42fe185e4d04.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DoberWild_v2.02/1762652579.7736902", - "retrieved_timestamp": "1762652579.773691", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_DoberWild_v2.02", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_DoberWild_v2.02" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7746368524404137 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.531273698652086 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19939577039274925 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39458333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3764128989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DoberWild_v2.03/b81cbefe-7c08-4bc2-979f-10caf20fa9fa.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DoberWild_v2.03/b81cbefe-7c08-4bc2-979f-10caf20fa9fa.json deleted file mode 100644 index e4eef1cff832b031a941946d932bb2091dfda0f6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DoberWild_v2.03/b81cbefe-7c08-4bc2-979f-10caf20fa9fa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DoberWild_v2.03/1762652579.7739289", - "retrieved_timestamp": "1762652579.77393", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_DoberWild_v2.03", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_DoberWild_v2.03" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7764354135914928 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5294434267893284 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20770392749244712 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3045302013422819 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39058333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37217420212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DodoWild_v2.01/78ecc0f4-dcd5-4c25-a598-ef95114f5868.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DodoWild_v2.01/78ecc0f4-dcd5-4c25-a598-ef95114f5868.json deleted file mode 100644 index 45be72b9eb76177693716e3b6318e5013c70e3a5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DodoWild_v2.01/78ecc0f4-dcd5-4c25-a598-ef95114f5868.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DodoWild_v2.01/1762652579.7741492", - "retrieved_timestamp": "1762652579.7741492", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_DodoWild_v2.01", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7977677008116243 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5252760762748857 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1986404833836858 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40896874999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3738364361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.031 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DodoWild_v2.02/f8448236-89b9-4a9c-949b-9bb45db5e400.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DodoWild_v2.02/f8448236-89b9-4a9c-949b-9bb45db5e400.json deleted file mode 100644 index 4922080ec3dc9c5269d1847615f494839c658b23..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DodoWild_v2.02/f8448236-89b9-4a9c-949b-9bb45db5e400.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DodoWild_v2.02/1762652579.774375", - "retrieved_timestamp": "1762652579.774376", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_DodoWild_v2.02", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.02" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8016895171478344 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5261737638679802 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22734138972809667 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39706249999999993 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37608045212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DodoWild_v2.03/3b2b7ebc-be82-4d7d-8bc8-e718513d164c.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DodoWild_v2.03/3b2b7ebc-be82-4d7d-8bc8-e718513d164c.json deleted file mode 100644 index a48b37fd89126e55f14562d2c2d02e81a577c095..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DodoWild_v2.03/3b2b7ebc-be82-4d7d-8bc8-e718513d164c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DodoWild_v2.03/1762652579.7746859", - "retrieved_timestamp": "1762652579.774687", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_DodoWild_v2.03", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.03" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7941207108250552 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.530825004382936 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22205438066465258 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3958541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37857380319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DodoWild_v2.10/ca49f981-e4eb-4235-b472-de832ffedd72.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DodoWild_v2.10/ca49f981-e4eb-4235-b472-de832ffedd72.json deleted file mode 100644 index 3ade95fff81809245363cdc4182a6275d99aa7cd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_DodoWild_v2.10/ca49f981-e4eb-4235-b472-de832ffedd72.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_DodoWild_v2.10/1762652579.7749188", - "retrieved_timestamp": "1762652579.7749188", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_DodoWild_v2.10", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.10" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8053863748188141 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5278362703806528 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1971299093655589 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41566666666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3854720744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Dolermed_R1_V1.01/ca856917-9100-41ea-9900-91d12be1de44.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Dolermed_R1_V1.01/ca856917-9100-41ea-9900-91d12be1de44.json deleted file mode 100644 index 497378b28c292a3301bf1a134f66051189aa3820..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Dolermed_R1_V1.01/ca856917-9100-41ea-9900-91d12be1de44.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Dolermed_R1_V1.01/1762652579.775126", - "retrieved_timestamp": "1762652579.775127", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.01", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7533544329046928 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5312389177563648 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20166163141993956 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37470833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3732546542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Dolermed_R1_V1.03/b1f9e472-38c5-409f-b112-3006bca90b94.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Dolermed_R1_V1.03/b1f9e472-38c5-409f-b112-3006bca90b94.json deleted file mode 100644 index a0a580be2e6c5bdea2d657ee4f11bf8c79bbd89d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Dolermed_R1_V1.03/b1f9e472-38c5-409f-b112-3006bca90b94.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Dolermed_R1_V1.03/1762652579.7753332", - "retrieved_timestamp": "1762652579.775334", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.03", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.03" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7564019025075688 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5316448098766001 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20921450151057402 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37200797872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Dolermed_V1.01/4733fd17-2d7a-44cd-83bf-1201a3173495.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Dolermed_V1.01/4733fd17-2d7a-44cd-83bf-1201a3173495.json deleted file mode 100644 index 83b5595ce07ae95fbcedae894a5e318a3c2f193d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Dolermed_V1.01/4733fd17-2d7a-44cd-83bf-1201a3173495.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Dolermed_V1.01/1762652579.775538", - "retrieved_timestamp": "1762652579.775538", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Dolermed_V1.01", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Dolermed_V1.01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.508657030013697 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5193615033347353 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13444108761329304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39448958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3570478723404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.031 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Dolerstormed_V1.04/9d44d069-44b1-414a-93c1-91b46ceabe66.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Dolerstormed_V1.04/9d44d069-44b1-414a-93c1-91b46ceabe66.json deleted file mode 100644 index 991b2e82271a22f9da5921e73dafab3d5733cf99..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Dolerstormed_V1.04/9d44d069-44b1-414a-93c1-91b46ceabe66.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Dolerstormed_V1.04/1762652579.775745", - "retrieved_timestamp": "1762652579.775746", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Dolerstormed_V1.04", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Dolerstormed_V1.04" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7889001183526376 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5195180641442355 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221476510067114 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4029583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3888796542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Hermedash_R1_V1.04/615e5bca-6f64-4bf9-a131-eefd7ec32c08.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Hermedash_R1_V1.04/615e5bca-6f64-4bf9-a131-eefd7ec32c08.json deleted file mode 100644 index 2d6fbfe25b35b54f77c511011b83427e708ae233..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Hermedash_R1_V1.04/615e5bca-6f64-4bf9-a131-eefd7ec32c08.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Hermedash_R1_V1.04/1762652579.775957", - "retrieved_timestamp": "1762652579.775958", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Hermedash_R1_V1.04", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Hermedash_R1_V1.04" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7871514248859692 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5191641616026265 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1865558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4110520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38821476063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Hermedive_R1_V1.01/82f2d97c-e8d2-47a4-a56b-af781b98ba0b.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Hermedive_R1_V1.01/82f2d97c-e8d2-47a4-a56b-af781b98ba0b.json deleted file mode 100644 index 99a52cb37cf1f0469c2cca805e9c7954b94dd6d3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Hermedive_R1_V1.01/82f2d97c-e8d2-47a4-a56b-af781b98ba0b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Hermedive_R1_V1.01/1762652579.7761788", - "retrieved_timestamp": "1762652579.7761788", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.01", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5001141415887622 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5170855986734039 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17749244712990936 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40084374999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34266954787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Hermedive_R1_V1.03/e73d5aee-ad0f-4bec-8230-2087669444bb.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Hermedive_R1_V1.03/e73d5aee-ad0f-4bec-8230-2087669444bb.json deleted file mode 100644 index e1c5bee6e5af8bb4955d792378c3438837035bca..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Hermedive_R1_V1.03/e73d5aee-ad0f-4bec-8230-2087669444bb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Hermedive_R1_V1.03/1762652579.776387", - "retrieved_timestamp": "1762652579.7763882", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.03", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.03" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6647528557560606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5140787918844759 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3613125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3488198138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Hermedive_V1.01/99589a08-8f1e-437e-b6f0-e33a9dab5806.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Hermedive_V1.01/99589a08-8f1e-437e-b6f0-e33a9dab5806.json deleted file mode 100644 index c594c73605637db73a04fb381ecf1a30f6a4ba79..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Hermedive_V1.01/99589a08-8f1e-437e-b6f0-e33a9dab5806.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Hermedive_V1.01/1762652579.776601", - "retrieved_timestamp": "1762652579.776602", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Hermedive_V1.01", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Hermedive_V1.01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5061592131101034 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4918197968512548 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36965624999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3550531914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.031 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Mediver_V1.01/35eb03f0-f11e-40d8-a830-7ce2cfde2956.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Mediver_V1.01/35eb03f0-f11e-40d8-a830-7ce2cfde2956.json deleted file mode 100644 index e47707318e89b3401a062d95329035995b6bccf9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Mediver_V1.01/35eb03f0-f11e-40d8-a830-7ce2cfde2956.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Mediver_V1.01/1762652579.7768", - "retrieved_timestamp": "1762652579.776801", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Mediver_V1.01", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Mediver_V1.01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18847103463255274 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44148325896745977 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0015105740181268882 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38978124999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2993683510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.031 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Medusa_v1.01/01b841ba-ecb1-4025-91b7-fb2c443ef85c.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Medusa_v1.01/01b841ba-ecb1-4025-91b7-fb2c443ef85c.json deleted file mode 100644 index 9cb73987d5e50b34566cda999fbf7cf1ca2e1a9d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Medusa_v1.01/01b841ba-ecb1-4025-91b7-fb2c443ef85c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Medusa_v1.01/1762652579.777005", - "retrieved_timestamp": "1762652579.7770061", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Medusa_v1.01", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Medusa_v1.01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7685419132346618 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5017727187674992 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40667708333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3531416223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.031 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Smarteaz_0.2_R1/1cbff8d9-a857-4816-8427-0450871021d6.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Smarteaz_0.2_R1/1cbff8d9-a857-4816-8427-0450871021d6.json deleted file mode 100644 index 7268118291ec3672bf1d03580f825ae88f782597..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Smarteaz_0.2_R1/1cbff8d9-a857-4816-8427-0450871021d6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Smarteaz_0.2_R1/1762652579.777212", - "retrieved_timestamp": "1762652579.777212", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Smarteaz_0.2_R1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Smarteaz_0.2_R1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6345529860769425 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5112504828088763 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26057401812688824 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4188020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3645279255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Smarteaz_V1.01/10cc1ce1-986e-44f5-b14e-a7b44d9de68d.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Smarteaz_V1.01/10cc1ce1-986e-44f5-b14e-a7b44d9de68d.json deleted file mode 100644 index 8d9d801cb7caf8732817dca272c6b4b94c076938..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Smarteaz_V1.01/10cc1ce1-986e-44f5-b14e-a7b44d9de68d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Smarteaz_V1.01/1762652579.777418", - "retrieved_timestamp": "1762652579.777418", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Smarteaz_V1.01", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Smarteaz_V1.01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8151283040111349 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5241273021389002 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37892708333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3735871010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Stormeder_v1.04/e831c8bd-5bdd-4f00-9c91-ab4b29dfc66c.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Stormeder_v1.04/e831c8bd-5bdd-4f00-9c91-ab4b29dfc66c.json deleted file mode 100644 index a7802c933cec131f889769b5557dc72467aaaee1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Stormeder_v1.04/e831c8bd-5bdd-4f00-9c91-ab4b29dfc66c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Stormeder_v1.04/1762652579.777617", - "retrieved_timestamp": "1762652579.777618", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Stormeder_v1.04", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Stormeder_v1.04" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7852531283660686 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5207086605445487 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18504531722054382 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3948958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38522273936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Typhoon_v1.03/6043c193-a533-4194-8cf5-9ed83d095f0d.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Typhoon_v1.03/6043c193-a533-4194-8cf5-9ed83d095f0d.json deleted file mode 100644 index 7a04ae8b71babd1a75dc46a888c91910b701e5ad..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.1_8b_Typhoon_v1.03/6043c193-a533-4194-8cf5-9ed83d095f0d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.1_8b_Typhoon_v1.03/1762652579.7778199", - "retrieved_timestamp": "1762652579.7778208", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.1_8b_Typhoon_v1.03", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.1_8b_Typhoon_v1.03" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8078343240379969 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5313965802672672 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22734138972809667 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38146875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3842253989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_AquaSyn_0.1/4b512748-f6d0-4ed0-8ece-5b853a174329.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_AquaSyn_0.1/4b512748-f6d0-4ed0-8ece-5b853a174329.json deleted file mode 100644 index 05338fceb13b69884889a988bf9ed95678a0e727..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_AquaSyn_0.1/4b512748-f6d0-4ed0-8ece-5b853a174329.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_AquaSyn_0.1/1762652579.7780669", - "retrieved_timestamp": "1762652579.778068", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_AquaSyn_0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_AquaSyn_0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2741004977903075 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3284363786988483 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34603125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1377992021276596 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_AquaSyn_0.11/d3e57fb7-44cb-408a-9ed6-6387b1f0a543.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_AquaSyn_0.11/d3e57fb7-44cb-408a-9ed6-6387b1f0a543.json deleted file mode 100644 index ae88e808ebe1ed248071167c352a0a5264f77d48..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_AquaSyn_0.11/d3e57fb7-44cb-408a-9ed6-6387b1f0a543.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_AquaSyn_0.11/1762652579.778271", - "retrieved_timestamp": "1762652579.778271", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_AquaSyn_0.11", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_AquaSyn_0.11" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24312601674667658 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3111956727868642 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3367604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1116190159574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_Dolto_0.1/dae3d027-e262-462c-9930-cfee221cef58.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_Dolto_0.1/dae3d027-e262-462c-9930-cfee221cef58.json deleted file mode 100644 index 03ff977ccf0fb142b080d386ed44026ecca12a1b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_Dolto_0.1/dae3d027-e262-462c-9930-cfee221cef58.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Dolto_0.1/1762652579.778476", - "retrieved_timestamp": "1762652579.778477", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_Dolto_0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_Dolto_0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5433782364127182 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3350056502150862 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23741610738255034 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.342125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13638630319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_Odyssea_V1.01/f3922129-7e69-495d-925b-c3c8a1b70c5a.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_Odyssea_V1.01/f3922129-7e69-495d-925b-c3c8a1b70c5a.json deleted file mode 100644 index ef4b1d6119143958845648904d73faa0026ca99a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_Odyssea_V1.01/f3922129-7e69-495d-925b-c3c8a1b70c5a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Odyssea_V1.01/1762652579.778893", - "retrieved_timestamp": "1762652579.7788942", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_Odyssea_V1.01", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_Odyssea_V1.01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24954564998648032 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3044651612138552 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34203125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11519281914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_Odyssea_V1/deb8be23-8976-4dfb-b038-70a4b77de9f6.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_Odyssea_V1/deb8be23-8976-4dfb-b038-70a4b77de9f6.json deleted file mode 100644 index 5525bb3224a33aff440142810467f75b0a3fd422..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_Odyssea_V1/deb8be23-8976-4dfb-b038-70a4b77de9f6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Odyssea_V1/1762652579.77868", - "retrieved_timestamp": "1762652579.77868", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_Odyssea_V1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_Odyssea_V1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2552660274737696 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3009715832098017 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33936458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11527593085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_OpenTree_R1_0.1/11c52cd6-75e0-4800-9b98-fbc4aa81260d.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_OpenTree_R1_0.1/11c52cd6-75e0-4800-9b98-fbc4aa81260d.json deleted file mode 100644 index 1bf49da83b4bccb3ba22d57ee87c28961708b573..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_OpenTree_R1_0.1/11c52cd6-75e0-4800-9b98-fbc4aa81260d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_OpenTree_R1_0.1/1762652579.779097", - "retrieved_timestamp": "1762652579.779098", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_OpenTree_R1_0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_OpenTree_R1_0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5366339091388627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3279521771600605 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31307291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16747007978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_OrcaSun_V1/dd17eeb9-c1d1-4f98-986e-aad15a592891.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_OrcaSun_V1/dd17eeb9-c1d1-4f98-986e-aad15a592891.json deleted file mode 100644 index f8d893dcba0d20841f4bef1030343b60f133a1da..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_OrcaSun_V1/dd17eeb9-c1d1-4f98-986e-aad15a592891.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_OrcaSun_V1/1762652579.779477", - "retrieved_timestamp": "1762652579.779478", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_OrcaSun_V1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_OrcaSun_V1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5948605256275571 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.355031362479927 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05966767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23657718120805368 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33803125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19040890957446807 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_RandomLego_RP_R1_0.1/8254ed33-9ce6-484d-9171-5402156a1933.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_RandomLego_RP_R1_0.1/8254ed33-9ce6-484d-9171-5402156a1933.json deleted file mode 100644 index abc8104cd0df929932dfff8c233b04d3f2b14b4d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_RandomLego_RP_R1_0.1/8254ed33-9ce6-484d-9171-5402156a1933.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_RandomLego_RP_R1_0.1/1762652579.779787", - "retrieved_timestamp": "1762652579.779788", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_RandomLego_RP_R1_0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_RandomLego_RP_R1_0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5542693386880144 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34277067367168224 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3249166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15633311170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_SunOrca_V1/848752ff-c92d-4ce2-94e8-5b8c8b765b77.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_SunOrca_V1/848752ff-c92d-4ce2-94e8-5b8c8b765b77.json deleted file mode 100644 index 9b49b0154314e92f4f5ba5a7830a96fe73dc3aa0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_SunOrca_V1/848752ff-c92d-4ce2-94e8-5b8c8b765b77.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_SunOrca_V1/1762652579.7800052", - "retrieved_timestamp": "1762652579.780006", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_SunOrca_V1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_SunOrca_V1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.542953807009845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34306447662530104 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18841422872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_Sydonia_0.1/980cf18c-0163-414c-8ed0-dff894a328ee.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_Sydonia_0.1/980cf18c-0163-414c-8ed0-dff894a328ee.json deleted file mode 100644 index a917830e58d775eb0d63a0fdc944ea0112529648..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_Sydonia_0.1/980cf18c-0163-414c-8ed0-dff894a328ee.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Sydonia_0.1/1762652579.780214", - "retrieved_timestamp": "1762652579.780215", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_Sydonia_0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_Sydonia_0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21967047434141412 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31210928710549807 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22818791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33818750000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12242353723404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_Syneridol_0.2/99397e12-f601-478c-af40-c8f428b923a8.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_Syneridol_0.2/99397e12-f601-478c-af40-c8f428b923a8.json deleted file mode 100644 index 0f593eea08434058e6c0f57047f997f53e6e4b95..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_Syneridol_0.2/99397e12-f601-478c-af40-c8f428b923a8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Syneridol_0.2/1762652579.780447", - "retrieved_timestamp": "1762652579.780447", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_Syneridol_0.2", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_Syneridol_0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21574865800520399 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3138849872298115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33428125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12267287234042554 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_Synopsys_0.1/00ccf406-3e59-44cb-af59-6dcd391678ff.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_Synopsys_0.1/00ccf406-3e59-44cb-af59-6dcd391678ff.json deleted file mode 100644 index dc7610149d98564a2307b9a331d821f9d1588320..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_Synopsys_0.1/00ccf406-3e59-44cb-af59-6dcd391678ff.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Synopsys_0.1/1762652579.780673", - "retrieved_timestamp": "1762652579.780674", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_Synopsys_0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_Synopsys_0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17638089158987041 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31619439082949846 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23909395973154363 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34609375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12308843085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_Synopsys_0.11/6e4a0c11-2349-4846-9d9b-ccf6ef9ea43a.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_Synopsys_0.11/6e4a0c11-2349-4846-9d9b-ccf6ef9ea43a.json deleted file mode 100644 index 89d82609aca0f10a176f43bb5512aa34bc203525..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_1b_Synopsys_0.11/6e4a0c11-2349-4846-9d9b-ccf6ef9ea43a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_1b_Synopsys_0.11/1762652579.780885", - "retrieved_timestamp": "1762652579.780886", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_1b_Synopsys_0.11", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_1b_Synopsys_0.11" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28421698870109086 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31019707628668325 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35133333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228390957446809 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_3b_Kermes_v1/f81acd72-b38a-424a-878b-833d094518da.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_3b_Kermes_v1/f81acd72-b38a-424a-878b-833d094518da.json deleted file mode 100644 index fdca1f99c4e4f5d1fcbe9c8036277354d58a5ddf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_3b_Kermes_v1/f81acd72-b38a-424a-878b-833d094518da.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_3b_Kermes_v1/1762652579.781107", - "retrieved_timestamp": "1762652579.781108", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_3b_Kermes_v1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_3b_Kermes_v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4851759996808468 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4409910297279671 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40702083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2547373670212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_3b_Kermes_v2.1/f4686eff-f1d7-49e0-85be-2a6c7f125e29.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_3b_Kermes_v2.1/f4686eff-f1d7-49e0-85be-2a6c7f125e29.json deleted file mode 100644 index c99930ca9bed687305c3df012e36365869fbba4c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_3b_Kermes_v2.1/f4686eff-f1d7-49e0-85be-2a6c7f125e29.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_3b_Kermes_v2.1/1762652579.781543", - "retrieved_timestamp": "1762652579.781544", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_3b_Kermes_v2.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_3b_Kermes_v2.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5583906257618674 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44638999626044323 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3963541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26919880319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_3b_Kermes_v2/a3d85774-ddac-436f-9c64-a751d2924bb5.json b/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_3b_Kermes_v2/a3d85774-ddac-436f-9c64-a751d2924bb5.json deleted file mode 100644 index 4f5f06a9db380bc691d5cb285287fd52a1949ce2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Nexesenex_Llama_3.2_3b_Kermes_v2/a3d85774-ddac-436f-9c64-a751d2924bb5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Nexesenex_Llama_3.2_3b_Kermes_v2/1762652579.781325", - "retrieved_timestamp": "1762652579.781326", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Nexesenex/Llama_3.2_3b_Kermes_v2", - "developer": "meta", - "inference_platform": "unknown", - "id": "Nexesenex/Llama_3.2_3b_Kermes_v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5753766672429155 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44554539692939316 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37781249999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2734375 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/NotASI_FineTome-Llama3.2-1B-0929/2346a7eb-2148-49f3-b960-363ba6b776d4.json b/leaderboard_data/HFOpenLLMv2/meta/NotASI_FineTome-Llama3.2-1B-0929/2346a7eb-2148-49f3-b960-363ba6b776d4.json deleted file mode 100644 index f43f58516432fff5d2cd07c4d1f51a8a7593b886..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/NotASI_FineTome-Llama3.2-1B-0929/2346a7eb-2148-49f3-b960-363ba6b776d4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NotASI_FineTome-Llama3.2-1B-0929/1762652579.788707", - "retrieved_timestamp": "1762652579.7887082", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NotASI/FineTome-Llama3.2-1B-0929", - "developer": "meta", - "inference_platform": "unknown", - "id": "NotASI/FineTome-Llama3.2-1B-0929" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39907223943580805 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3246274874705644 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3487604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1428690159574468 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/NotASI_FineTome-Llama3.2-3B-1002/e701f5dc-d604-4bbb-8e92-37d69781ae5f.json b/leaderboard_data/HFOpenLLMv2/meta/NotASI_FineTome-Llama3.2-3B-1002/e701f5dc-d604-4bbb-8e92-37d69781ae5f.json deleted file mode 100644 index f1b4f719f289c96e5c80a07fa9c8959e0aff05bf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/NotASI_FineTome-Llama3.2-3B-1002/e701f5dc-d604-4bbb-8e92-37d69781ae5f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NotASI_FineTome-Llama3.2-3B-1002/1762652579.788946", - "retrieved_timestamp": "1762652579.7889469", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NotASI/FineTome-Llama3.2-3B-1002", - "developer": "meta", - "inference_platform": "unknown", - "id": "NotASI/FineTome-Llama3.2-3B-1002" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5474496558021605 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4319470614025341 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3685104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24368351063829788 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/NotASI_FineTome-v1.5-Llama3.2-1B-1007/8c67c634-82f0-4bb8-bd70-e98902649d96.json b/leaderboard_data/HFOpenLLMv2/meta/NotASI_FineTome-v1.5-Llama3.2-1B-1007/8c67c634-82f0-4bb8-bd70-e98902649d96.json deleted file mode 100644 index 82f2ac00aa59c9109c5545da7c31c06e8fbd535d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/NotASI_FineTome-v1.5-Llama3.2-1B-1007/8c67c634-82f0-4bb8-bd70-e98902649d96.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NotASI_FineTome-v1.5-Llama3.2-1B-1007/1762652579.789186", - "retrieved_timestamp": "1762652579.789187", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NotASI/FineTome-v1.5-Llama3.2-1B-1007", - "developer": "meta", - "inference_platform": "unknown", - "id": "NotASI/FineTome-v1.5-Llama3.2-1B-1007" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39237777984636324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32405671121485663 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34745833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1427027925531915 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/NotASI_FineTome-v1.5-Llama3.2-3B-1007/d8a359e5-2899-4d3f-9fb4-3120f61951f4.json b/leaderboard_data/HFOpenLLMv2/meta/NotASI_FineTome-v1.5-Llama3.2-3B-1007/d8a359e5-2899-4d3f-9fb4-3120f61951f4.json deleted file mode 100644 index ed8bc632fe54e06ce6ebddaf634899d7f80aa2dd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/NotASI_FineTome-v1.5-Llama3.2-3B-1007/d8a359e5-2899-4d3f-9fb4-3120f61951f4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NotASI_FineTome-v1.5-Llama3.2-3B-1007/1762652579.789401", - "retrieved_timestamp": "1762652579.789401", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NotASI/FineTome-v1.5-Llama3.2-3B-1007", - "developer": "meta", - "inference_platform": "unknown", - "id": "NotASI/FineTome-v1.5-Llama3.2-3B-1007" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5507719517546776 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4312372935321582 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3645416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2448470744680851 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Hermes-2-Pro-Llama-3-8B/af47ca72-b9b5-4cf3-84a7-e2f4602e6eaa.json b/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Hermes-2-Pro-Llama-3-8B/af47ca72-b9b5-4cf3-84a7-e2f4602e6eaa.json deleted file mode 100644 index b101723e2545116eae2128dd4be3481c238d7938..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Hermes-2-Pro-Llama-3-8B/af47ca72-b9b5-4cf3-84a7-e2f4602e6eaa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NousResearch_Hermes-2-Pro-Llama-3-8B/1762652579.78989", - "retrieved_timestamp": "1762652579.789891", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NousResearch/Hermes-2-Pro-Llama-3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "NousResearch/Hermes-2-Pro-Llama-3-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5361839918084017 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.507112624310082 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08383685800604229 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4262395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30518617021276595 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.031 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Hermes-2-Theta-Llama-3-8B/99c4b14f-8ea6-4f6e-af65-1e2ee58eeca9.json b/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Hermes-2-Theta-Llama-3-8B/99c4b14f-8ea6-4f6e-af65-1e2ee58eeca9.json deleted file mode 100644 index 08f5298dd464ccbfbafd59d46a186bb4faaf72b0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Hermes-2-Theta-Llama-3-8B/99c4b14f-8ea6-4f6e-af65-1e2ee58eeca9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NousResearch_Hermes-2-Theta-Llama-3-8B/1762652579.79036", - "retrieved_timestamp": "1762652579.79036", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NousResearch/Hermes-2-Theta-Llama-3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "NousResearch/Hermes-2-Theta-Llama-3-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6517883659800441 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5206672260911865 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09667673716012085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3948958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33685172872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Hermes-3-Llama-3.1-70B/e48bd1d8-1082-4b79-8145-87d7f013fb82.json b/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Hermes-3-Llama-3.1-70B/e48bd1d8-1082-4b79-8145-87d7f013fb82.json deleted file mode 100644 index 4db7f02d46bf18b3c943c7621ff1e33ec4a63db2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Hermes-3-Llama-3.1-70B/e48bd1d8-1082-4b79-8145-87d7f013fb82.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NousResearch_Hermes-3-Llama-3.1-70B/1762652579.7905731", - "retrieved_timestamp": "1762652579.7905731", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NousResearch/Hermes-3-Llama-3.1-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "NousResearch/Hermes-3-Llama-3.1-70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7661438316998896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6755780641387483 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20996978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615771812080537 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4948958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47265625 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Hermes-3-Llama-3.1-8B/b9300d76-c854-48a2-a900-b661c1fae7bf.json b/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Hermes-3-Llama-3.1-8B/b9300d76-c854-48a2-a900-b661c1fae7bf.json deleted file mode 100644 index b449c8011445f549e36e68a2cf846677cec7c4c4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Hermes-3-Llama-3.1-8B/b9300d76-c854-48a2-a900-b661c1fae7bf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NousResearch_Hermes-3-Llama-3.1-8B/1762652579.790786", - "retrieved_timestamp": "1762652579.790787", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NousResearch/Hermes-3-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "NousResearch/Hermes-3-Llama-3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6170172918966121 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5177452540141246 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4369375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3139128989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Hermes-3-Llama-3.2-3B/7e5f7bc1-1f9a-497a-a903-7d612bb923ca.json b/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Hermes-3-Llama-3.2-3B/7e5f7bc1-1f9a-497a-a903-7d612bb923ca.json deleted file mode 100644 index ffcd5453e5287e0fb26197bf326289537d5a6878..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Hermes-3-Llama-3.2-3B/7e5f7bc1-1f9a-497a-a903-7d612bb923ca.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NousResearch_Hermes-3-Llama-3.2-3B/1762652579.790994", - "retrieved_timestamp": "1762652579.790995", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NousResearch/Hermes-3-Llama-3.2-3B", - "developer": "meta", - "inference_platform": "unknown", - "id": "NousResearch/Hermes-3-Llama-3.2-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3824862476008103 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43519901506714875 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40302083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25440492021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Nous-Hermes-llama-2-7b/6ab36d53-da10-4f80-bd1b-dc037a020362.json b/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Nous-Hermes-llama-2-7b/6ab36d53-da10-4f80-bd1b-dc037a020362.json deleted file mode 100644 index a38c19f19ad65acd318278395acb65305c47d19f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Nous-Hermes-llama-2-7b/6ab36d53-da10-4f80-bd1b-dc037a020362.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NousResearch_Nous-Hermes-llama-2-7b/1762652579.792065", - "retrieved_timestamp": "1762652579.792066", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NousResearch/Nous-Hermes-llama-2-7b", - "developer": "meta", - "inference_platform": "unknown", - "id": "NousResearch/Nous-Hermes-llama-2-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17290788441335658 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3823937686034717 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42571875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19398271276595744 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Yarn-Llama-2-13b-128k/e067537a-a621-483f-b1cf-ee78f57a39da.json b/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Yarn-Llama-2-13b-128k/e067537a-a621-483f-b1cf-ee78f57a39da.json deleted file mode 100644 index 879f34c22050de50ac77221b981e399226095783..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Yarn-Llama-2-13b-128k/e067537a-a621-483f-b1cf-ee78f57a39da.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Llama-2-13b-128k/1762652579.792277", - "retrieved_timestamp": "1762652579.792278", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NousResearch/Yarn-Llama-2-13b-128k", - "developer": "meta", - "inference_platform": "unknown", - "id": "NousResearch/Yarn-Llama-2-13b-128k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16546430138698653 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3826816443733663 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34575 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23204787234042554 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Yarn-Llama-2-7b-128k/e3e717a5-a987-4e94-a528-9aafadb6774f.json b/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Yarn-Llama-2-7b-128k/e3e717a5-a987-4e94-a528-9aafadb6774f.json deleted file mode 100644 index 84cd94a88fce38ad87cedd84ef63594ee636082f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Yarn-Llama-2-7b-128k/e3e717a5-a987-4e94-a528-9aafadb6774f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Llama-2-7b-128k/1762652579.792481", - "retrieved_timestamp": "1762652579.7924821", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NousResearch/Yarn-Llama-2-7b-128k", - "developer": "meta", - "inference_platform": "unknown", - "id": "NousResearch/Yarn-Llama-2-7b-128k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14847825990593846 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32480295375597734 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39669791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1791057180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Yarn-Llama-2-7b-64k/50db2b1d-e0b5-43b1-86e2-5fa55fb3a960.json b/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Yarn-Llama-2-7b-64k/50db2b1d-e0b5-43b1-86e2-5fa55fb3a960.json deleted file mode 100644 index a591d4b0e1f783ea649b44a681db83540c8b8c8d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/NousResearch_Yarn-Llama-2-7b-64k/50db2b1d-e0b5-43b1-86e2-5fa55fb3a960.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Llama-2-7b-64k/1762652579.7927492", - "retrieved_timestamp": "1762652579.792753", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NousResearch/Yarn-Llama-2-7b-64k", - "developer": "meta", - "inference_platform": "unknown", - "id": "NousResearch/Yarn-Llama-2-7b-64k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1699856381068897 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3326277865253592 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.393875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17985372340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/OEvortex_Emotional-llama-8B/c2593003-ca2a-4699-8473-a07683e7cd85.json b/leaderboard_data/HFOpenLLMv2/meta/OEvortex_Emotional-llama-8B/c2593003-ca2a-4699-8473-a07683e7cd85.json deleted file mode 100644 index 0a200b564e6ef39374ed08dbee314297fac4070c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/OEvortex_Emotional-llama-8B/c2593003-ca2a-4699-8473-a07683e7cd85.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OEvortex_Emotional-llama-8B/1762652579.797152", - "retrieved_timestamp": "1762652579.797153", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OEvortex/Emotional-llama-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "OEvortex/Emotional-llama-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3516369898535885 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4838573702054177 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.365875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35347406914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3-70b-v21.2-32k/3d49db5c-bcd1-4d2f-9616-c551a53bdebe.json b/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3-70b-v21.2-32k/3d49db5c-bcd1-4d2f-9616-c551a53bdebe.json deleted file mode 100644 index 2051ecccdedce5f524ff9ac20f23f727c4612c3f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3-70b-v21.2-32k/3d49db5c-bcd1-4d2f-9616-c551a53bdebe.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3-70b-v21.2-32k/1762652579.8002949", - "retrieved_timestamp": "1762652579.8002958", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-llama3-70b-v21.2-32k", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-llama3-70b-v21.2-32k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7010476646409305 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6507443429944494 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3422818791946309 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45796875000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4832114361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3-8b-v21.1-8k/2a86c8f6-2aed-4e0c-ad8a-e9ff5065a1e4.json b/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3-8b-v21.1-8k/2a86c8f6-2aed-4e0c-ad8a-e9ff5065a1e4.json deleted file mode 100644 index 7d1b2abba48cbe4f59ecb22eda6fba32f584df43..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3-8b-v21.1-8k/2a86c8f6-2aed-4e0c-ad8a-e9ff5065a1e4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3-8b-v21.1-8k/1762652579.800596", - "retrieved_timestamp": "1762652579.800596", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-llama3-8b-v21.1-8k", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-llama3-8b-v21.1-8k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5569666263292509 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47875007373484046 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3987708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2954621010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3-8b-v21.2-32k/960fabe4-5395-4d3f-9680-65fe0b8655ac.json b/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3-8b-v21.2-32k/960fabe4-5395-4d3f-9680-65fe0b8655ac.json deleted file mode 100644 index f522aa05337e743da1fc92b3ca053b8b5e8e0bf6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3-8b-v21.2-32k/960fabe4-5395-4d3f-9680-65fe0b8655ac.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3-8b-v21.2-32k/1762652579.800807", - "retrieved_timestamp": "1762652579.800808", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-llama3-8b-v21.2-32k", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-llama3-8b-v21.2-32k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6191904147661538 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4856219845879779 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.377875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3298703457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3.1-70b-v22.1-131k/77d10b46-e3cf-42a0-b215-f9f8ff5ef60d.json b/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3.1-70b-v22.1-131k/77d10b46-e3cf-42a0-b215-f9f8ff5ef60d.json deleted file mode 100644 index ae4212b022eef8720c92f31f68b66d2383a772e1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3.1-70b-v22.1-131k/77d10b46-e3cf-42a0-b215-f9f8ff5ef60d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3.1-70b-v22.1-131k/1762652579.801551", - "retrieved_timestamp": "1762652579.801553", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-llama3.1-70b-v22.1-131k", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-llama3.1-70b-v22.1-131k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7332710541363582 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6698491606025763 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3950151057401813 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46295833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5304188829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3.1-8b-v22.2-131k/b57cd648-1503-4bbf-81d7-4ca72ac9ff27.json b/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3.1-8b-v22.2-131k/b57cd648-1503-4bbf-81d7-4ca72ac9ff27.json deleted file mode 100644 index 6627c2b43a2f0a3e37037865f73e16de74ba18f8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3.1-8b-v22.2-131k/b57cd648-1503-4bbf-81d7-4ca72ac9ff27.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3.1-8b-v22.2-131k/1762652579.801888", - "retrieved_timestamp": "1762652579.801889", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-llama3.1-8b-v22.2-131k", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-llama3.1-8b-v22.2-131k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6657269378582162 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5006515954024578 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1148036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40810416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3310339095744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3.1-8b-v22.3-131k/7abaa7f8-8378-496c-b5f8-ac9046eeccc8.json b/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3.1-8b-v22.3-131k/7abaa7f8-8378-496c-b5f8-ac9046eeccc8.json deleted file mode 100644 index dfca23efafe40e5e636329d36591be46722d33a6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3.1-8b-v22.3-131k/7abaa7f8-8378-496c-b5f8-ac9046eeccc8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3.1-8b-v22.3-131k/1762652579.8021362", - "retrieved_timestamp": "1762652579.802138", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-llama3.1-8b-v22.3-131k", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-llama3.1-8b-v22.3-131k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5997065563815123 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5065914870348772 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40146875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3277094414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3.2-1b-v23.1-131k/85379044-198d-4fb5-82c8-50857f8d65d0.json b/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3.2-1b-v23.1-131k/85379044-198d-4fb5-82c8-50857f8d65d0.json deleted file mode 100644 index 1d9948a357d1e2e7e93283c50c062e20fa2483be..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3.2-1b-v23.1-131k/85379044-198d-4fb5-82c8-50857f8d65d0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3.2-1b-v23.1-131k/1762652579.802413", - "retrieved_timestamp": "1762652579.8024142", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-llama3.2-1b-v23.1-131k", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-llama3.2-1b-v23.1-131k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3590052172679601 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3266563226631131 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33421875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1840093085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3.2-3b-v23.2-131k/6d6e86f6-f1b7-42ef-9581-b0542e6e12ef.json b/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3.2-3b-v23.2-131k/6d6e86f6-f1b7-42ef-9581-b0542e6e12ef.json deleted file mode 100644 index a241049d49824c661dd750c1720a70c81dc6399b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3.2-3b-v23.2-131k/6d6e86f6-f1b7-42ef-9581-b0542e6e12ef.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3.2-3b-v23.2-131k/1762652579.802651", - "retrieved_timestamp": "1762652579.802652", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-llama3.2-3b-v23.2-131k", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-llama3.2-3b-v23.2-131k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4319450169993395 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4072660342069299 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2479222074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.607 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3.3-70b-v24.1-131k/49768a60-0b77-4945-a048-013a6fb719ca.json b/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3.3-70b-v24.1-131k/49768a60-0b77-4945-a048-013a6fb719ca.json deleted file mode 100644 index 7555a2a1a6368fe1dc59669f5200c50d62cb1616..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-llama3.3-70b-v24.1-131k/49768a60-0b77-4945-a048-013a6fb719ca.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-llama3.3-70b-v24.1-131k/1762652579.802965", - "retrieved_timestamp": "1762652579.8029802", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-llama3.3-70b-v24.1-131k", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-llama3.3-70b-v24.1-131k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.812080834408259 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6858038620320306 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44108761329305135 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43456375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4869270833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5327460106382979 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-qwen2.5llamaify-14b-v23.1-200k/489b8b24-4295-41b3-b286-14f79972fe93.json b/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-qwen2.5llamaify-14b-v23.1-200k/489b8b24-4295-41b3-b286-14f79972fe93.json deleted file mode 100644 index ebce075eef31fe08e2684ad3b41fc5447e550172..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-qwen2.5llamaify-14b-v23.1-200k/489b8b24-4295-41b3-b286-14f79972fe93.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-qwen2.5llamaify-14b-v23.1-200k/1762652579.804163", - "retrieved_timestamp": "1762652579.8041642", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.1-200k", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.1-200k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.630880508162786 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.601319898776811 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2537764350453172 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42404166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4673371010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-qwen2.5llamaify-14b-v23.3-200k/ce4e7736-51d8-431a-9bef-ac2bcb3ff0fe.json b/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-qwen2.5llamaify-14b-v23.3-200k/ce4e7736-51d8-431a-9bef-ac2bcb3ff0fe.json deleted file mode 100644 index e825c433e46abe8f1b70e4b16d79f5ca4d2d9002..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-qwen2.5llamaify-14b-v23.3-200k/ce4e7736-51d8-431a-9bef-ac2bcb3ff0fe.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-qwen2.5llamaify-14b-v23.3-200k/1762652579.8044102", - "retrieved_timestamp": "1762652579.804411", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.3-200k", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.3-200k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6131453432448126 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6080855261046028 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4345833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4794714095744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-qwen2.5llamaify-7b-v23.1-200k/d5f3ca22-b682-47c6-a7ba-93b401cb8c8f.json b/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-qwen2.5llamaify-7b-v23.1-200k/d5f3ca22-b682-47c6-a7ba-93b401cb8c8f.json deleted file mode 100644 index e759d0b430b8459c10fdf65900cd079a045915f6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/OpenBuddy_openbuddy-qwen2.5llamaify-7b-v23.1-200k/d5f3ca22-b682-47c6-a7ba-93b401cb8c8f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-qwen2.5llamaify-7b-v23.1-200k/1762652579.804652", - "retrieved_timestamp": "1762652579.8046532", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenBuddy/openbuddy-qwen2.5llamaify-7b-v23.1-200k", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenBuddy/openbuddy-qwen2.5llamaify-7b-v23.1-200k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5672582082208539 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5509381466888461 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18882175226586104 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43632291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.394780585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.615 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/OpenLeecher_llama3-8b-lima/b482d6e6-8520-4a77-a729-ebe2e9635a6c.json b/leaderboard_data/HFOpenLLMv2/meta/OpenLeecher_llama3-8b-lima/b482d6e6-8520-4a77-a729-ebe2e9635a6c.json deleted file mode 100644 index ffc17ba5303a15a5c718792665b4dec52090e6c5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/OpenLeecher_llama3-8b-lima/b482d6e6-8520-4a77-a729-ebe2e9635a6c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenLeecher_llama3-8b-lima/1762652579.807648", - "retrieved_timestamp": "1762652579.8076491", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenLeecher/llama3-8b-lima", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenLeecher/llama3-8b-lima" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43706587410293574 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4295828632822993 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23825503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37127083333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26263297872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/OpenScholar_Llama-3.1_OpenScholar-8B/1e6ea564-30ff-4db3-8bb6-070da34e3fb5.json b/leaderboard_data/HFOpenLLMv2/meta/OpenScholar_Llama-3.1_OpenScholar-8B/1e6ea564-30ff-4db3-8bb6-070da34e3fb5.json deleted file mode 100644 index 26c120844f61cea454a15550b9188ae1a46d5966..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/OpenScholar_Llama-3.1_OpenScholar-8B/1e6ea564-30ff-4db3-8bb6-070da34e3fb5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/OpenScholar_Llama-3.1_OpenScholar-8B/1762652579.807913", - "retrieved_timestamp": "1762652579.807913", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "OpenScholar/Llama-3.1_OpenScholar-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "OpenScholar/Llama-3.1_OpenScholar-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6064010159709571 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5207740834450674 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16540785498489427 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4275104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.370844414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Orenguteng_Llama-3.1-8B-Lexi-Uncensored-V2/3b02898e-b47f-4d53-9bd4-575d47df29af.json b/leaderboard_data/HFOpenLLMv2/meta/Orenguteng_Llama-3.1-8B-Lexi-Uncensored-V2/3b02898e-b47f-4d53-9bd4-575d47df29af.json deleted file mode 100644 index 56e8706c004395cb432eaa7b0357f8564116b574..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Orenguteng_Llama-3.1-8B-Lexi-Uncensored-V2/3b02898e-b47f-4d53-9bd4-575d47df29af.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Orenguteng_Llama-3.1-8B-Lexi-Uncensored-V2/1762652579.808416", - "retrieved_timestamp": "1762652579.808417", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2", - "developer": "meta", - "inference_platform": "unknown", - "id": "Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7791581891603169 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5084008018783934 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1971299093655589 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3842916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3780751329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Orenguteng_Llama-3.1-8B-Lexi-Uncensored/fe095b66-350c-4236-ab1b-e2e19af73486.json b/leaderboard_data/HFOpenLLMv2/meta/Orenguteng_Llama-3.1-8B-Lexi-Uncensored/fe095b66-350c-4236-ab1b-e2e19af73486.json deleted file mode 100644 index 8b863f38b609fa408a1d4b2aa4a7af9d69356d5b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Orenguteng_Llama-3.1-8B-Lexi-Uncensored/fe095b66-350c-4236-ab1b-e2e19af73486.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Orenguteng_Llama-3.1-8B-Lexi-Uncensored/1762652579.8081658", - "retrieved_timestamp": "1762652579.808167", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Orenguteng/Llama-3.1-8B-Lexi-Uncensored", - "developer": "meta", - "inference_platform": "unknown", - "id": "Orenguteng/Llama-3.1-8B-Lexi-Uncensored" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7776843220432896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5057261652642643 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15709969788519637 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3871145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37898936170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/PJMixers-Dev_LLaMa-3.1-RomboTiesTest-8B/0130c0ac-a790-492d-aac2-55e999b724ef.json b/leaderboard_data/HFOpenLLMv2/meta/PJMixers-Dev_LLaMa-3.1-RomboTiesTest-8B/0130c0ac-a790-492d-aac2-55e999b724ef.json deleted file mode 100644 index f1fb1518f9bb2ac7c7a72b06d79824f5922bfaed..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/PJMixers-Dev_LLaMa-3.1-RomboTiesTest-8B/0130c0ac-a790-492d-aac2-55e999b724ef.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.1-RomboTiesTest-8B/1762652579.8100638", - "retrieved_timestamp": "1762652579.8100648", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "PJMixers-Dev/LLaMa-3.1-RomboTiesTest-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "PJMixers-Dev/LLaMa-3.1-RomboTiesTest-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7825303527972447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5073267838961463 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2001510574018127 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3869895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3767453457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/PJMixers-Dev_LLaMa-3.1-RomboTiesTest2-8B/dbfe2c89-a7c8-4fe5-95a1-cf1a58b6f55c.json b/leaderboard_data/HFOpenLLMv2/meta/PJMixers-Dev_LLaMa-3.1-RomboTiesTest2-8B/dbfe2c89-a7c8-4fe5-95a1-cf1a58b6f55c.json deleted file mode 100644 index 2f3263f10c31f57a0bd41e442804acffdeb9aec3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/PJMixers-Dev_LLaMa-3.1-RomboTiesTest2-8B/dbfe2c89-a7c8-4fe5-95a1-cf1a58b6f55c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.1-RomboTiesTest2-8B/1762652579.810312", - "retrieved_timestamp": "1762652579.810313", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "PJMixers-Dev/LLaMa-3.1-RomboTiesTest2-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "PJMixers-Dev/LLaMa-3.1-RomboTiesTest2-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7825303527972447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5073267838961463 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2001510574018127 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3869895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3767453457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.015 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/PJMixers_LLaMa-3-CursedStock-v2.0-8B/4f7c69a5-70e5-4f7b-9520-9fa9e642df57.json b/leaderboard_data/HFOpenLLMv2/meta/PJMixers_LLaMa-3-CursedStock-v2.0-8B/4f7c69a5-70e5-4f7b-9520-9fa9e642df57.json deleted file mode 100644 index 15b30bc1a1d928fc1d44f41be675fc12245f3483..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/PJMixers_LLaMa-3-CursedStock-v2.0-8B/4f7c69a5-70e5-4f7b-9520-9fa9e642df57.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/PJMixers_LLaMa-3-CursedStock-v2.0-8B/1762652579.809348", - "retrieved_timestamp": "1762652579.809348", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "PJMixers/LLaMa-3-CursedStock-v2.0-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "PJMixers/LLaMa-3-CursedStock-v2.0-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6330791189599152 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.527115950402997 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09441087613293052 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38562500000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3556349734042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/RLHFlow_ArmoRM-Llama3-8B-v0.1/b8ce63dd-5c8a-4bba-b381-147efcdcc161.json b/leaderboard_data/HFOpenLLMv2/meta/RLHFlow_ArmoRM-Llama3-8B-v0.1/b8ce63dd-5c8a-4bba-b381-147efcdcc161.json deleted file mode 100644 index bbda12b27a54866c7847f5ee207cfebc453f1135..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/RLHFlow_ArmoRM-Llama3-8B-v0.1/b8ce63dd-5c8a-4bba-b381-147efcdcc161.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/RLHFlow_ArmoRM-Llama3-8B-v0.1/1762652579.8493571", - "retrieved_timestamp": "1762652579.8493571", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "RLHFlow/ArmoRM-Llama3-8B-v0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "RLHFlow/ArmoRM-Llama3-8B-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18967007539993883 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2876467446788138 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3948020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10779587765957446 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForRewardModelWithGating", - "params_billions": 7.511 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Replete-AI_Replete-Coder-Llama3-8B/c8b29113-7815-4cf3-be36-76e3e87d6068.json b/leaderboard_data/HFOpenLLMv2/meta/Replete-AI_Replete-Coder-Llama3-8B/c8b29113-7815-4cf3-be36-76e3e87d6068.json deleted file mode 100644 index 955eff5aec50d044b5a537810b45da428e6a0187..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Replete-AI_Replete-Coder-Llama3-8B/c8b29113-7815-4cf3-be36-76e3e87d6068.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-Coder-Llama3-8B/1762652579.851821", - "retrieved_timestamp": "1762652579.851821", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Replete-AI/Replete-Coder-Llama3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Replete-AI/Replete-Coder-Llama3-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4729362535849324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271277102526684 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26090604026845643 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39530208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13306183510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Replete-AI_Replete-LLM-V2-Llama-3.1-8b/c3977d28-b18d-4e86-bc69-1aa08422585c.json b/leaderboard_data/HFOpenLLMv2/meta/Replete-AI_Replete-LLM-V2-Llama-3.1-8b/c3977d28-b18d-4e86-bc69-1aa08422585c.json deleted file mode 100644 index c901ad9acc9938c0d15644974cd968ba7cb321b2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Replete-AI_Replete-LLM-V2-Llama-3.1-8b/c3977d28-b18d-4e86-bc69-1aa08422585c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-LLM-V2-Llama-3.1-8b/1762652579.8529909", - "retrieved_timestamp": "1762652579.852992", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Replete-AI/Replete-LLM-V2-Llama-3.1-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "Replete-AI/Replete-LLM-V2-Llama-3.1-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5514966954347797 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5339203611594218 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1404833836858006 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4000729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37533244680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/SaisExperiments_RightSheep-Llama3.2-3B/4ef7907b-270f-45dc-8f18-88c62c1c8bfe.json b/leaderboard_data/HFOpenLLMv2/meta/SaisExperiments_RightSheep-Llama3.2-3B/4ef7907b-270f-45dc-8f18-88c62c1c8bfe.json deleted file mode 100644 index 32c501d9e43b5dff5c4e6939201e2225eeda2502..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/SaisExperiments_RightSheep-Llama3.2-3B/4ef7907b-270f-45dc-8f18-88c62c1c8bfe.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SaisExperiments_RightSheep-Llama3.2-3B/1762652579.8563251", - "retrieved_timestamp": "1762652579.8563259", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SaisExperiments/RightSheep-Llama3.2-3B", - "developer": "meta", - "inference_platform": "unknown", - "id": "SaisExperiments/RightSheep-Llama3.2-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4156338515139829 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42407794300783824 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08081570996978851 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3767291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25398936170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Sakalti_Llama3.2-3B-Uranus-1/aba2e376-936d-4960-a82b-da09d2266826.json b/leaderboard_data/HFOpenLLMv2/meta/Sakalti_Llama3.2-3B-Uranus-1/aba2e376-936d-4960-a82b-da09d2266826.json deleted file mode 100644 index 57fc4ff516938fbf75b3575efdbd1140b2698e09..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Sakalti_Llama3.2-3B-Uranus-1/aba2e376-936d-4960-a82b-da09d2266826.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_Llama3.2-3B-Uranus-1/1762652579.8570151", - "retrieved_timestamp": "1762652579.857016", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/Llama3.2-3B-Uranus-1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Sakalti/Llama3.2-3B-Uranus-1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5335365718515761 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44368258173181263 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14954682779456194 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3668645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3094248670212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/SentientAGI_Dobby-Mini-Leashed-Llama-3.1-8B/ed1798c0-348f-4294-b546-8a7892225d33.json b/leaderboard_data/HFOpenLLMv2/meta/SentientAGI_Dobby-Mini-Leashed-Llama-3.1-8B/ed1798c0-348f-4294-b546-8a7892225d33.json deleted file mode 100644 index 82bbf0d86bd628e5cd05f73e9b86dcff6bd3182c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/SentientAGI_Dobby-Mini-Leashed-Llama-3.1-8B/ed1798c0-348f-4294-b546-8a7892225d33.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SentientAGI_Dobby-Mini-Leashed-Llama-3.1-8B/1762652579.878995", - "retrieved_timestamp": "1762652579.878996", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SentientAGI/Dobby-Mini-Leashed-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "SentientAGI/Dobby-Mini-Leashed-Llama-3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7847034756667863 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5138053850165866 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.425375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36943151595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/SentientAGI_Dobby-Mini-Unhinged-Llama-3.1-8B/6ac51916-9278-46b6-9b0f-059745f3d845.json b/leaderboard_data/HFOpenLLMv2/meta/SentientAGI_Dobby-Mini-Unhinged-Llama-3.1-8B/6ac51916-9278-46b6-9b0f-059745f3d845.json deleted file mode 100644 index b7e428e1d9492aff36ec0322d6f940e4ba9d61b9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/SentientAGI_Dobby-Mini-Unhinged-Llama-3.1-8B/6ac51916-9278-46b6-9b0f-059745f3d845.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SentientAGI_Dobby-Mini-Unhinged-Llama-3.1-8B/1762652579.879248", - "retrieved_timestamp": "1762652579.879248", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SentientAGI/Dobby-Mini-Unhinged-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "SentientAGI/Dobby-Mini-Unhinged-Llama-3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7456858912130924 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5142440064892148 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40128125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35846077127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Sicarius-Prototyping_Brainy_LLAMA/83fd7abf-00b0-4242-b8c3-87ef9c40dfcf.json b/leaderboard_data/HFOpenLLMv2/meta/Sicarius-Prototyping_Brainy_LLAMA/83fd7abf-00b0-4242-b8c3-87ef9c40dfcf.json deleted file mode 100644 index 3f4319f54602de08756411b4a38fe68c6b1b407e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Sicarius-Prototyping_Brainy_LLAMA/83fd7abf-00b0-4242-b8c3-87ef9c40dfcf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sicarius-Prototyping_Brainy_LLAMA/1762652579.880492", - "retrieved_timestamp": "1762652579.8804932", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sicarius-Prototyping/Brainy_LLAMA", - "developer": "meta", - "inference_platform": "unknown", - "id": "Sicarius-Prototyping/Brainy_LLAMA" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5204224790223274 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5117131754488634 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1336858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4143333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3848902925531915 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/SicariusSicariiStuff_Impish_LLAMA_3B/9235cd92-5335-498e-881f-21938da4ed61.json b/leaderboard_data/HFOpenLLMv2/meta/SicariusSicariiStuff_Impish_LLAMA_3B/9235cd92-5335-498e-881f-21938da4ed61.json deleted file mode 100644 index e0829b4922cda928408162edd083c026f6dc8229..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/SicariusSicariiStuff_Impish_LLAMA_3B/9235cd92-5335-498e-881f-21938da4ed61.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Impish_LLAMA_3B/1762652579.882116", - "retrieved_timestamp": "1762652579.882117", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SicariusSicariiStuff/Impish_LLAMA_3B", - "developer": "meta", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Impish_LLAMA_3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46299485365496884 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40905101627873225 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3672708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2941323138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/SicariusSicariiStuff_LLAMA-3_8B_Unaligned_BETA/27e6623c-49b2-4763-ac6f-b35f1f9002a8.json b/leaderboard_data/HFOpenLLMv2/meta/SicariusSicariiStuff_LLAMA-3_8B_Unaligned_BETA/27e6623c-49b2-4763-ac6f-b35f1f9002a8.json deleted file mode 100644 index 804d05879525cb555813ce24b24f687af24a4b60..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/SicariusSicariiStuff_LLAMA-3_8B_Unaligned_BETA/27e6623c-49b2-4763-ac6f-b35f1f9002a8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_LLAMA-3_8B_Unaligned_BETA/1762652579.883067", - "retrieved_timestamp": "1762652579.883067", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA", - "developer": "meta", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3713203189758729 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4717234028484832 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08383685800604229 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41194791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464926861702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.1-8B-lora-epoch1/da7be2d8-96ff-4902-9628-c1781391c68e.json b/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.1-8B-lora-epoch1/da7be2d8-96ff-4902-9628-c1781391c68e.json deleted file mode 100644 index 11b66595e446826b894d7e9a051ddc800d1807b3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.1-8B-lora-epoch1/da7be2d8-96ff-4902-9628-c1781391c68e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.1-8B-lora-epoch1/1762652579.8857", - "retrieved_timestamp": "1762652579.8857012", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SkyOrbis/SKY-Ko-Llama3.1-8B-lora-epoch1", - "developer": "meta", - "inference_platform": "unknown", - "id": "SkyOrbis/SKY-Ko-Llama3.1-8B-lora-epoch1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5058345190760515 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5088388495224864 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15483383685800603 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3997916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3777426861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.1-8B-lora/fffe8411-9f9c-48ce-adb5-8d483022bffe.json b/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.1-8B-lora/fffe8411-9f9c-48ce-adb5-8d483022bffe.json deleted file mode 100644 index 69795ab1e037231103c9e8f6cb0447c49fc3f61e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.1-8B-lora/fffe8411-9f9c-48ce-adb5-8d483022bffe.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.1-8B-lora/1762652579.88546", - "retrieved_timestamp": "1762652579.885461", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SkyOrbis/SKY-Ko-Llama3.1-8B-lora", - "developer": "meta", - "inference_platform": "unknown", - "id": "SkyOrbis/SKY-Ko-Llama3.1-8B-lora" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5058345190760515 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5088388495224864 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15483383685800603 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3997916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3777426861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-epoch3/d0e4c608-0c64-4cf4-aee6-714475d500db.json b/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-epoch3/d0e4c608-0c64-4cf4-aee6-714475d500db.json deleted file mode 100644 index 9ad8e513debf318745ff6b4aca73cab323320ccc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-epoch3/d0e4c608-0c64-4cf4-aee6-714475d500db.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-epoch3/1762652579.8859022", - "retrieved_timestamp": "1762652579.8859022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch3", - "developer": "meta", - "inference_platform": "unknown", - "id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3247084402718121 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3166586087861201 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33815625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12790890957446807 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-epoch5/19c08486-99c5-4f53-a6cc-69cb58e0808a.json b/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-epoch5/19c08486-99c5-4f53-a6cc-69cb58e0808a.json deleted file mode 100644 index 2bdafc7631e58cc3ca3bc8bf252502b592ebf891..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-epoch5/19c08486-99c5-4f53-a6cc-69cb58e0808a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-epoch5/1762652579.8861618", - "retrieved_timestamp": "1762652579.886163", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch5", - "developer": "meta", - "inference_platform": "unknown", - "id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4359920566319587 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060156188911545 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3471458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19456449468085107 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-v2-epoch3/f45610c5-ead3-4670-9639-aa30fb145829.json b/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-v2-epoch3/f45610c5-ead3-4670-9639-aa30fb145829.json deleted file mode 100644 index c11bd39fd7f04028136eb00709486ec32b73d9c0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-v2-epoch3/f45610c5-ead3-4670-9639-aa30fb145829.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-v2-epoch3/1762652579.886383", - "retrieved_timestamp": "1762652579.886384", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch3", - "developer": "meta", - "inference_platform": "unknown", - "id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4359920566319587 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060156188911545 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3471458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19456449468085107 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-v2-epoch5/34a1eda3-2a02-4522-955a-7ed3f1ee97d6.json b/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-v2-epoch5/34a1eda3-2a02-4522-955a-7ed3f1ee97d6.json deleted file mode 100644 index 5fbe8a6c59afbf44a731a7116b8e578271bcc839..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-v2-epoch5/34a1eda3-2a02-4522-955a-7ed3f1ee97d6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-1B-lora-v2-epoch5/1762652579.8865862", - "retrieved_timestamp": "1762652579.886587", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch5", - "developer": "meta", - "inference_platform": "unknown", - "id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42467652495378927 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33968360414253995 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34584375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19456449468085107 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.2-3B-lora-epoch1/08fdfb9e-7998-4483-bb1a-4ea7f0e2980e.json b/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.2-3B-lora-epoch1/08fdfb9e-7998-4483-bb1a-4ea7f0e2980e.json deleted file mode 100644 index 61c97e8328b6b7f64c4edbfd3c88517bb7256689..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.2-3B-lora-epoch1/08fdfb9e-7998-4483-bb1a-4ea7f0e2980e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-3B-lora-epoch1/1762652579.886793", - "retrieved_timestamp": "1762652579.886794", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch1", - "developer": "meta", - "inference_platform": "unknown", - "id": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5331121424487028 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4399628268031015 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14577039274924472 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35222916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30044880319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.2-3B-lora-epoch2/37a5a439-e2ac-46ec-af94-b60f127157de.json b/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.2-3B-lora-epoch2/37a5a439-e2ac-46ec-af94-b60f127157de.json deleted file mode 100644 index 180571bde56ec9010d73b357437e9d76c51a584d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.2-3B-lora-epoch2/37a5a439-e2ac-46ec-af94-b60f127157de.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-3B-lora-epoch2/1762652579.887009", - "retrieved_timestamp": "1762652579.88701", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch2", - "developer": "meta", - "inference_platform": "unknown", - "id": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5331121424487028 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4399628268031015 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14577039274924472 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35222916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30044880319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.2-3B-lora-epoch3/6d191a68-8817-468a-850b-01f5ba76e05f.json b/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.2-3B-lora-epoch3/6d191a68-8817-468a-850b-01f5ba76e05f.json deleted file mode 100644 index 643f57ba68ff8ca9164db39b58c3b2aac70c9ace..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/SkyOrbis_SKY-Ko-Llama3.2-3B-lora-epoch3/6d191a68-8817-468a-850b-01f5ba76e05f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Llama3.2-3B-lora-epoch3/1762652579.887351", - "retrieved_timestamp": "1762652579.8873532", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch3", - "developer": "meta", - "inference_platform": "unknown", - "id": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5331121424487028 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4399628268031015 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14577039274924472 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35222916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30044880319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Skywork_Skywork-o1-Open-Llama-3.1-8B/e98879cc-d7fd-4e97-ab86-0ca28265abeb.json b/leaderboard_data/HFOpenLLMv2/meta/Skywork_Skywork-o1-Open-Llama-3.1-8B/e98879cc-d7fd-4e97-ab86-0ca28265abeb.json deleted file mode 100644 index 9d6f5a34d615e89b560ef3136b35163bdfae94bc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Skywork_Skywork-o1-Open-Llama-3.1-8B/e98879cc-d7fd-4e97-ab86-0ca28265abeb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Skywork_Skywork-o1-Open-Llama-3.1-8B/1762652579.8887959", - "retrieved_timestamp": "1762652579.888797", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Skywork/Skywork-o1-Open-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Skywork/Skywork-o1-Open-Llama-3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3518364605912313 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45159089701897237 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31564583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20304188829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Solshine_Llama-3-1-big-thoughtful-passthrough-merge-2/b36e0fba-9fa1-4e74-9d26-b4889343f113.json b/leaderboard_data/HFOpenLLMv2/meta/Solshine_Llama-3-1-big-thoughtful-passthrough-merge-2/b36e0fba-9fa1-4e74-9d26-b4889343f113.json deleted file mode 100644 index bfbe54d7bac150dbf80435cfb55b9c480296266b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Solshine_Llama-3-1-big-thoughtful-passthrough-merge-2/b36e0fba-9fa1-4e74-9d26-b4889343f113.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Solshine_Llama-3-1-big-thoughtful-passthrough-merge-2/1762652579.889379", - "retrieved_timestamp": "1762652579.88938", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Solshine/Llama-3-1-big-thoughtful-passthrough-merge-2", - "developer": "meta", - "inference_platform": "unknown", - "id": "Solshine/Llama-3-1-big-thoughtful-passthrough-merge-2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25466650709007654 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32093808427144627 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38894791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11851728723404255 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 18.5 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/T145_Llama-3.1-8B-Zeus/e0889500-8f6e-496c-b275-ac110458c56d.json b/leaderboard_data/HFOpenLLMv2/meta/T145_Llama-3.1-8B-Zeus/e0889500-8f6e-496c-b275-ac110458c56d.json deleted file mode 100644 index 7d8b38ab19026bafcfd67e12b51520456ea89fa8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/T145_Llama-3.1-8B-Zeus/e0889500-8f6e-496c-b275-ac110458c56d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/T145_Llama-3.1-8B-Zeus/1762652579.900112", - "retrieved_timestamp": "1762652579.9001129", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "T145/Llama-3.1-8B-Zeus", - "developer": "meta", - "inference_platform": "unknown", - "id": "T145/Llama-3.1-8B-Zeus" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35176110497923285 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3671175348446849 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33158333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1332280585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Tarek07_Progenitor-V1.1-LLaMa-70B/8638b115-f092-42f1-949d-162321fe5833.json b/leaderboard_data/HFOpenLLMv2/meta/Tarek07_Progenitor-V1.1-LLaMa-70B/8638b115-f092-42f1-949d-162321fe5833.json deleted file mode 100644 index 9d02dc74aba007d9210150758964abf72a8a3d56..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Tarek07_Progenitor-V1.1-LLaMa-70B/8638b115-f092-42f1-949d-162321fe5833.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Tarek07_Progenitor-V1.1-LLaMa-70B/1762652579.911703", - "retrieved_timestamp": "1762652579.911703", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Tarek07/Progenitor-V1.1-LLaMa-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Tarek07/Progenitor-V1.1-LLaMa-70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6906064796960952 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6971116049173388 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35725075528700906 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45805369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47356250000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5465425531914894 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Tarek07_Thalassic-Alpha-LLaMa-70B/a20052ae-dfa0-4df7-a9a6-f182dbef513d.json b/leaderboard_data/HFOpenLLMv2/meta/Tarek07_Thalassic-Alpha-LLaMa-70B/a20052ae-dfa0-4df7-a9a6-f182dbef513d.json deleted file mode 100644 index dff9e9746b2ec6320eb7cd5418f9e7b7e171e840..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Tarek07_Thalassic-Alpha-LLaMa-70B/a20052ae-dfa0-4df7-a9a6-f182dbef513d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Tarek07_Thalassic-Alpha-LLaMa-70B/1762652579.9119601", - "retrieved_timestamp": "1762652579.911961", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Tarek07/Thalassic-Alpha-LLaMa-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Tarek07/Thalassic-Alpha-LLaMa-70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7003484088884161 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6940408286616311 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3149546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4437919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4801979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.543467420212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/TencentARC_LLaMA-Pro-8B/8d2c510b-a092-4e5d-b468-6e58501cad8a.json b/leaderboard_data/HFOpenLLMv2/meta/TencentARC_LLaMA-Pro-8B/8d2c510b-a092-4e5d-b468-6e58501cad8a.json deleted file mode 100644 index c5d0c8a85c4b200f25915c20c0c9cc4bbab2a6b4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/TencentARC_LLaMA-Pro-8B/8d2c510b-a092-4e5d-b468-6e58501cad8a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TencentARC_LLaMA-Pro-8B/1762652579.912878", - "retrieved_timestamp": "1762652579.912879", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TencentARC/LLaMA-Pro-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "TencentARC/LLaMA-Pro-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2277135777514772 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3484197711435169 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40181249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18110039893617022 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.357 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/TheDrummer_Llama-3SOME-8B-v2/8f4349ad-76e7-4ce5-9121-fef2e376b4bc.json b/leaderboard_data/HFOpenLLMv2/meta/TheDrummer_Llama-3SOME-8B-v2/8f4349ad-76e7-4ce5-9121-fef2e376b4bc.json deleted file mode 100644 index 3d450c2256c4fbc2ff2ae9148567250ac802c9f4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/TheDrummer_Llama-3SOME-8B-v2/8f4349ad-76e7-4ce5-9121-fef2e376b4bc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TheDrummer_Llama-3SOME-8B-v2/1762652579.914594", - "retrieved_timestamp": "1762652579.9145951", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TheDrummer/Llama-3SOME-8B-v2", - "developer": "meta", - "inference_platform": "unknown", - "id": "TheDrummer/Llama-3SOME-8B-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4508049752434651 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5203347869042534 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3832708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37533244680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/TinyLlama_TinyLlama-1.1B-intermediate-step-1431k-3T/99c5044d-1308-4f30-9413-bc2672545f76.json b/leaderboard_data/HFOpenLLMv2/meta/TinyLlama_TinyLlama-1.1B-intermediate-step-1431k-3T/99c5044d-1308-4f30-9413-bc2672545f76.json deleted file mode 100644 index 1a62dbaa1da9d364bcaa95f86a9368dbb6215192..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/TinyLlama_TinyLlama-1.1B-intermediate-step-1431k-3T/99c5044d-1308-4f30-9413-bc2672545f76.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama-1.1B-intermediate-step-1431k-3T/1762652579.9195771", - "retrieved_timestamp": "1762652579.919578", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", - "developer": "meta", - "inference_platform": "unknown", - "id": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22766371006706648 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3071188438267271 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33803125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11203457446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/TinyLlama_TinyLlama_v1.1/e81db661-b05a-4d95-8be4-d663317d3d13.json b/leaderboard_data/HFOpenLLMv2/meta/TinyLlama_TinyLlama_v1.1/e81db661-b05a-4d95-8be4-d663317d3d13.json deleted file mode 100644 index 3e5c1c9b5d7ed9acd1b1b6f6155ae45a1b60c17e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/TinyLlama_TinyLlama_v1.1/e81db661-b05a-4d95-8be4-d663317d3d13.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama_v1.1/1762652579.919856", - "retrieved_timestamp": "1762652579.9198568", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TinyLlama/TinyLlama_v1.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "TinyLlama/TinyLlama_v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20006139266036338 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30237018045076064 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36996874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10488696808510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Triangle104_DS-Distilled-Hermes-Llama-3.1/d8a0873b-58e8-449a-aedd-7117e9931546.json b/leaderboard_data/HFOpenLLMv2/meta/Triangle104_DS-Distilled-Hermes-Llama-3.1/d8a0873b-58e8-449a-aedd-7117e9931546.json deleted file mode 100644 index e1e284a20f1253b822be36641cad00fd543d4be0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Triangle104_DS-Distilled-Hermes-Llama-3.1/d8a0873b-58e8-449a-aedd-7117e9931546.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_DS-Distilled-Hermes-Llama-3.1/1762652579.9221509", - "retrieved_timestamp": "1762652579.922152", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/DS-Distilled-Hermes-Llama-3.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "Triangle104/DS-Distilled-Hermes-Llama-3.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3229353670483207 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5117012556460311 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2930513595166163 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4038541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31100398936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Triangle104_DS-Distilled-Hermes-Llama-3.1_TIES/9383604e-dd29-4c51-87eb-68f19ff929ec.json b/leaderboard_data/HFOpenLLMv2/meta/Triangle104_DS-Distilled-Hermes-Llama-3.1_TIES/9383604e-dd29-4c51-87eb-68f19ff929ec.json deleted file mode 100644 index 631abc52150f983b4dce4098e6df8ca602a612d8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Triangle104_DS-Distilled-Hermes-Llama-3.1_TIES/9383604e-dd29-4c51-87eb-68f19ff929ec.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_DS-Distilled-Hermes-Llama-3.1_TIES/1762652579.922394", - "retrieved_timestamp": "1762652579.922395", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/DS-Distilled-Hermes-Llama-3.1_TIES", - "developer": "meta", - "inference_platform": "unknown", - "id": "Triangle104/DS-Distilled-Hermes-Llama-3.1_TIES" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13641360479084386 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.292845246551473 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24496644295302014 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36209375000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11037234042553191 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Triangle104_DS-R1-Llama-8B-Harmony/ef25dd23-7cc0-46ad-898d-31bfb5205aad.json b/leaderboard_data/HFOpenLLMv2/meta/Triangle104_DS-R1-Llama-8B-Harmony/ef25dd23-7cc0-46ad-898d-31bfb5205aad.json deleted file mode 100644 index 254c72676e2332070d0c7fe869244b3601476f5d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Triangle104_DS-R1-Llama-8B-Harmony/ef25dd23-7cc0-46ad-898d-31bfb5205aad.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_DS-R1-Llama-8B-Harmony/1762652579.9232068", - "retrieved_timestamp": "1762652579.9232068", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/DS-R1-Llama-8B-Harmony", - "developer": "meta", - "inference_platform": "unknown", - "id": "Triangle104/DS-R1-Llama-8B-Harmony" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35663262366077564 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41536451555729687 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4282477341389728 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3761979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27435172872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Triangle104_DSR1-Distill-Llama-Lit-8B/b31d5098-4324-4307-aa50-2413ceba5481.json b/leaderboard_data/HFOpenLLMv2/meta/Triangle104_DSR1-Distill-Llama-Lit-8B/b31d5098-4324-4307-aa50-2413ceba5481.json deleted file mode 100644 index b327070605d8bbadaae50368b66a1e08994f2bec..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Triangle104_DSR1-Distill-Llama-Lit-8B/b31d5098-4324-4307-aa50-2413ceba5481.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_DSR1-Distill-Llama-Lit-8B/1762652579.923411", - "retrieved_timestamp": "1762652579.923412", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/DSR1-Distill-Llama-Lit-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Triangle104/DSR1-Distill-Llama-Lit-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18852090231696345 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4284056327107781 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35196374622356497 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35346875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27975398936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Triangle104_Dolphin3-Llama3.2-Smart/88532e60-eff6-404b-8e74-fd8836a99ff9.json b/leaderboard_data/HFOpenLLMv2/meta/Triangle104_Dolphin3-Llama3.2-Smart/88532e60-eff6-404b-8e74-fd8836a99ff9.json deleted file mode 100644 index 05d7a8a0a4d8bdba8a7540f4ac78eea41eb592b1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Triangle104_Dolphin3-Llama3.2-Smart/88532e60-eff6-404b-8e74-fd8836a99ff9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Dolphin3-Llama3.2-Smart/1762652579.924712", - "retrieved_timestamp": "1762652579.924713", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Dolphin3-Llama3.2-Smart", - "developer": "meta", - "inference_platform": "unknown", - "id": "Triangle104/Dolphin3-Llama3.2-Smart" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.413660199382084 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.397507554563096 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3921666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21949800531914893 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Triangle104_Hermes-Llama-3.2-CoT-Summary/9bd6ca33-d62a-4327-a11e-f36188f0256a.json b/leaderboard_data/HFOpenLLMv2/meta/Triangle104_Hermes-Llama-3.2-CoT-Summary/9bd6ca33-d62a-4327-a11e-f36188f0256a.json deleted file mode 100644 index 6d019ea07d6b61faaa92ce3f7dd7711a94651a5c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Triangle104_Hermes-Llama-3.2-CoT-Summary/9bd6ca33-d62a-4327-a11e-f36188f0256a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Hermes-Llama-3.2-CoT-Summary/1762652579.925437", - "retrieved_timestamp": "1762652579.925438", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Hermes-Llama-3.2-CoT-Summary", - "developer": "meta", - "inference_platform": "unknown", - "id": "Triangle104/Hermes-Llama-3.2-CoT-Summary" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48302836473889277 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42003008354054533 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3575 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29014295212765956 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Triangle104_Hermes-Llama-3.2-CoT/ddacf85a-a333-4cf9-b0f2-b9a5d5831b8c.json b/leaderboard_data/HFOpenLLMv2/meta/Triangle104_Hermes-Llama-3.2-CoT/ddacf85a-a333-4cf9-b0f2-b9a5d5831b8c.json deleted file mode 100644 index 4dfeb4ab6d81f66382ae8f8d55ea6c2af459f621..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Triangle104_Hermes-Llama-3.2-CoT/ddacf85a-a333-4cf9-b0f2-b9a5d5831b8c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Hermes-Llama-3.2-CoT/1762652579.925184", - "retrieved_timestamp": "1762652579.925184", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Hermes-Llama-3.2-CoT", - "developer": "meta", - "inference_platform": "unknown", - "id": "Triangle104/Hermes-Llama-3.2-CoT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4177571066991139 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4615751505493966 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09516616314199396 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36978125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2947140957446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Triangle104_Llama3.1-Allades-Lit-8b/d3d2f0cc-2775-4a01-b8ae-5206cafcb2bb.json b/leaderboard_data/HFOpenLLMv2/meta/Triangle104_Llama3.1-Allades-Lit-8b/d3d2f0cc-2775-4a01-b8ae-5206cafcb2bb.json deleted file mode 100644 index 570d0d8967a2e03c9f1b805e97862c9e1d7912b9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Triangle104_Llama3.1-Allades-Lit-8b/d3d2f0cc-2775-4a01-b8ae-5206cafcb2bb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Llama3.1-Allades-Lit-8b/1762652579.927552", - "retrieved_timestamp": "1762652579.927553", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Llama3.1-Allades-Lit-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "Triangle104/Llama3.1-Allades-Lit-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24612361866514182 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41832977787362163 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37083333333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2724401595744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Triangle104_Llama3.1-cc-Lit-8b/3ccecc91-6528-4592-8ca3-722a62bfa102.json b/leaderboard_data/HFOpenLLMv2/meta/Triangle104_Llama3.1-cc-Lit-8b/3ccecc91-6528-4592-8ca3-722a62bfa102.json deleted file mode 100644 index 86232d6b3d243bfeeba49b4d063ab8e4cbb043b3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Triangle104_Llama3.1-cc-Lit-8b/3ccecc91-6528-4592-8ca3-722a62bfa102.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Llama3.1-cc-Lit-8b/1762652579.927792", - "retrieved_timestamp": "1762652579.9277928", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Llama3.1-cc-Lit-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "Triangle104/Llama3.1-cc-Lit-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2993047336622384 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3847994561866892 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0030211480362537764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30044880319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Triangle104_Porpoise-R1-Llama3.2-3b/29843ea0-0ab4-44e1-8206-10a1135cce8a.json b/leaderboard_data/HFOpenLLMv2/meta/Triangle104_Porpoise-R1-Llama3.2-3b/29843ea0-0ab4-44e1-8206-10a1135cce8a.json deleted file mode 100644 index 6069bd8a3b13f9aa81c86a653ea7fe3227f5fdac..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Triangle104_Porpoise-R1-Llama3.2-3b/29843ea0-0ab4-44e1-8206-10a1135cce8a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Porpoise-R1-Llama3.2-3b/1762652579.931781", - "retrieved_timestamp": "1762652579.931781", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Porpoise-R1-Llama3.2-3b", - "developer": "meta", - "inference_platform": "unknown", - "id": "Triangle104/Porpoise-R1-Llama3.2-3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4352174452674459 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38236758004585686 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.357625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21168550531914893 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Triangle104_RomboHermes3-R1-Llama3.2-3b/8ce06258-4909-4e46-a326-85052d28c5ff.json b/leaderboard_data/HFOpenLLMv2/meta/Triangle104_RomboHermes3-R1-Llama3.2-3b/8ce06258-4909-4e46-a326-85052d28c5ff.json deleted file mode 100644 index 3c87d20ca4a566ad424eb93b6ab7f22f3d6f6e39..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Triangle104_RomboHermes3-R1-Llama3.2-3b/8ce06258-4909-4e46-a326-85052d28c5ff.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_RomboHermes3-R1-Llama3.2-3b/1762652579.9345112", - "retrieved_timestamp": "1762652579.9345121", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/RomboHermes3-R1-Llama3.2-3b", - "developer": "meta", - "inference_platform": "unknown", - "id": "Triangle104/RomboHermes3-R1-Llama3.2-3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.300728733094855 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42639466274987187 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36565625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2957114361702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/UKzExecution_LlamaExecutor-8B-3.0.5/0f2ddff5-6077-4166-8fe4-ade89d3a6003.json b/leaderboard_data/HFOpenLLMv2/meta/UKzExecution_LlamaExecutor-8B-3.0.5/0f2ddff5-6077-4166-8fe4-ade89d3a6003.json deleted file mode 100644 index 64645ec98ec64ba9baeba7a465e9e412594a4d44..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/UKzExecution_LlamaExecutor-8B-3.0.5/0f2ddff5-6077-4166-8fe4-ade89d3a6003.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/UKzExecution_LlamaExecutor-8B-3.0.5/1762652579.938387", - "retrieved_timestamp": "1762652579.938387", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "UKzExecution/LlamaExecutor-8B-3.0.5", - "developer": "meta", - "inference_platform": "unknown", - "id": "UKzExecution/LlamaExecutor-8B-3.0.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.740290207759855 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5006000507021341 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3753645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3625332446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/VIRNECT_llama-3-Korean-8B-r-v-0.1/c3448f16-33c4-42c8-bde3-b503786cba7f.json b/leaderboard_data/HFOpenLLMv2/meta/VIRNECT_llama-3-Korean-8B-r-v-0.1/c3448f16-33c4-42c8-bde3-b503786cba7f.json deleted file mode 100644 index 4d2ca2fca840f71113290fd7b45daeaa69aa1a39..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/VIRNECT_llama-3-Korean-8B-r-v-0.1/c3448f16-33c4-42c8-bde3-b503786cba7f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/VIRNECT_llama-3-Korean-8B-r-v-0.1/1762652579.944067", - "retrieved_timestamp": "1762652579.9440682", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "VIRNECT/llama-3-Korean-8B-r-v-0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "VIRNECT/llama-3-Korean-8B-r-v-0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49157125316382755 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48061568139086264 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08610271903323263 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2424496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36748958333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3259640957446808 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 16.061 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/VIRNECT_llama-3-Korean-8B/1193d16a-5ba8-4a6c-b13d-116bb7731a71.json b/leaderboard_data/HFOpenLLMv2/meta/VIRNECT_llama-3-Korean-8B/1193d16a-5ba8-4a6c-b13d-116bb7731a71.json deleted file mode 100644 index 876e4d2d1657c6bb1d57f821eea86563fe0bc226..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/VIRNECT_llama-3-Korean-8B/1193d16a-5ba8-4a6c-b13d-116bb7731a71.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/VIRNECT_llama-3-Korean-8B/1762652579.943881", - "retrieved_timestamp": "1762652579.943882", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "VIRNECT/llama-3-Korean-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "VIRNECT/llama-3-Korean-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5021376614050719 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.491837579362695 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3647916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3536402925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/VIRNECT_llama-3-Korean-8B/c5ef57d2-a521-4b09-9aa1-0c06c9888cda.json b/leaderboard_data/HFOpenLLMv2/meta/VIRNECT_llama-3-Korean-8B/c5ef57d2-a521-4b09-9aa1-0c06c9888cda.json deleted file mode 100644 index c05d3ebaf50c605a95fafd20b73863cc8022bd47..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/VIRNECT_llama-3-Korean-8B/c5ef57d2-a521-4b09-9aa1-0c06c9888cda.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/VIRNECT_llama-3-Korean-8B/1762652579.943627", - "retrieved_timestamp": "1762652579.943627", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "VIRNECT/llama-3-Korean-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "VIRNECT/llama-3-Korean-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5058345190760515 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49082453083378397 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09290030211480363 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36615624999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3538896276595745 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3-70B-Fireplace/60150622-5b73-4b2c-a8f2-7c2e84cd3d0e.json b/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3-70B-Fireplace/60150622-5b73-4b2c-a8f2-7c2e84cd3d0e.json deleted file mode 100644 index 99634b6453070effbc721cc446e7d73858f14750..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3-70B-Fireplace/60150622-5b73-4b2c-a8f2-7c2e84cd3d0e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3-70B-Fireplace/1762652579.944278", - "retrieved_timestamp": "1762652579.944279", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ValiantLabs/Llama3-70B-Fireplace", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3-70B-Fireplace" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7773596280092377 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.648899361888402 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3548657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4448541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4892785904255319 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3-70B-ShiningValiant2/1650ab9b-4e64-48f1-9551-fb58758cb2f6.json b/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3-70B-ShiningValiant2/1650ab9b-4e64-48f1-9551-fb58758cb2f6.json deleted file mode 100644 index 2b0ac5f8850be63d5e787b6ee302b2a45180e4e0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3-70B-ShiningValiant2/1650ab9b-4e64-48f1-9551-fb58758cb2f6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3-70B-ShiningValiant2/1762652579.9445372", - "retrieved_timestamp": "1762652579.944538", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ValiantLabs/Llama3-70B-ShiningValiant2", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3-70B-ShiningValiant2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6121712611426571 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6338341405069171 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20770392749244712 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4325729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48977726063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-70B-ShiningValiant2/6f4c4594-6f73-44e3-b531-f7651b523e8f.json b/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-70B-ShiningValiant2/6f4c4594-6f73-44e3-b531-f7651b523e8f.json deleted file mode 100644 index f262f4daea3152e8cb3e7de059d84d2edfa8ff74..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-70B-ShiningValiant2/6f4c4594-6f73-44e3-b531-f7651b523e8f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-70B-ShiningValiant2/1762652579.94475", - "retrieved_timestamp": "1762652579.944751", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ValiantLabs/Llama3.1-70B-ShiningValiant2", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3.1-70B-ShiningValiant2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5355346037402979 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6738408402945882 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29154078549848944 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3926174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4681041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5172872340425532 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-8B-Cobalt/382ce872-f5a6-4753-9cca-ba06ddcbb4b6.json b/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-8B-Cobalt/382ce872-f5a6-4753-9cca-ba06ddcbb4b6.json deleted file mode 100644 index 0c9b1d1827fe460ad57135be281c5be9d51a1413..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-8B-Cobalt/382ce872-f5a6-4753-9cca-ba06ddcbb4b6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-Cobalt/1762652579.945206", - "retrieved_timestamp": "1762652579.945206", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ValiantLabs/Llama3.1-8B-Cobalt", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3.1-8B-Cobalt" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7168346653545925 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4910700749859321 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15332326283987915 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3512395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36627327127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-8B-Cobalt/8683a084-2521-469c-8575-9b2595c112dd.json b/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-8B-Cobalt/8683a084-2521-469c-8575-9b2595c112dd.json deleted file mode 100644 index 246746d99d717d11e410e75b5a6e7d50e1a6b057..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-8B-Cobalt/8683a084-2521-469c-8575-9b2595c112dd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-Cobalt/1762652579.9449751", - "retrieved_timestamp": "1762652579.9449759", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ValiantLabs/Llama3.1-8B-Cobalt", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3.1-8B-Cobalt" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3496134700372789 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4946769968149292 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3959479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3644448138297872 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-8B-Enigma/e1c4e454-79c8-448d-ab33-629900a35779.json b/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-8B-Enigma/e1c4e454-79c8-448d-ab33-629900a35779.json deleted file mode 100644 index 0624195d5dea94a6469817664dde173c4058fac1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-8B-Enigma/e1c4e454-79c8-448d-ab33-629900a35779.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-Enigma/1762652579.945396", - "retrieved_timestamp": "1762652579.945397", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ValiantLabs/Llama3.1-8B-Enigma", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3.1-8B-Enigma" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26805542626896633 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44776000880153927 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4196041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34092420212765956 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-8B-Esper2/aa8f6d7a-bf7a-4e00-932f-b31c9cf0705e.json b/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-8B-Esper2/aa8f6d7a-bf7a-4e00-932f-b31c9cf0705e.json deleted file mode 100644 index 2d711f22d2c794b2aea4666f08b2fd283cc24a43..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-8B-Esper2/aa8f6d7a-bf7a-4e00-932f-b31c9cf0705e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-Esper2/1762652579.945612", - "retrieved_timestamp": "1762652579.9456131", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ValiantLabs/Llama3.1-8B-Esper2", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3.1-8B-Esper2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2567398945907968 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4469866863000255 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3560729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29039228723404253 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-8B-Fireplace2/08843042-f5ed-4dbb-befe-82c48e370664.json b/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-8B-Fireplace2/08843042-f5ed-4dbb-befe-82c48e370664.json deleted file mode 100644 index 46e30675836bcca20f5e175adb896ea2a2d0fec4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-8B-Fireplace2/08843042-f5ed-4dbb-befe-82c48e370664.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-Fireplace2/1762652579.945827", - "retrieved_timestamp": "1762652579.945827", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ValiantLabs/Llama3.1-8B-Fireplace2", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3.1-8B-Fireplace2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5483240025354947 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4609817052543379 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34330208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24069148936170212 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-8B-Fireplace2/8c25e90b-944b-4c23-a7ed-43c9609c6bf7.json b/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-8B-Fireplace2/8c25e90b-944b-4c23-a7ed-43c9609c6bf7.json deleted file mode 100644 index 5afb1ab3c5d60ec3dc5e4aec63757fcd4ecc1653..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-8B-Fireplace2/8c25e90b-944b-4c23-a7ed-43c9609c6bf7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-Fireplace2/1762652579.946038", - "retrieved_timestamp": "1762652579.946039", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ValiantLabs/Llama3.1-8B-Fireplace2", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3.1-8B-Fireplace2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5328118281714739 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4613311485871581 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08761329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33666666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24235372340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-8B-ShiningValiant2/4b3c0c63-4718-4fce-bd70-a31b3b60dfad.json b/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-8B-ShiningValiant2/4b3c0c63-4718-4fce-bd70-a31b3b60dfad.json deleted file mode 100644 index ddc992cbe8b62fd62896085d023c132d43c3f9d7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-8B-ShiningValiant2/4b3c0c63-4718-4fce-bd70-a31b3b60dfad.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-ShiningValiant2/1762652579.946223", - "retrieved_timestamp": "1762652579.9462242", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ValiantLabs/Llama3.1-8B-ShiningValiant2", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3.1-8B-ShiningValiant2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6495653754260917 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.477390600131639 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39086458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33818151595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-8B-ShiningValiant2/e1d82962-59c9-44e7-9243-ea62f6639d1e.json b/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-8B-ShiningValiant2/e1d82962-59c9-44e7-9243-ea62f6639d1e.json deleted file mode 100644 index e6078bf3be7a7e76f4a1698018017616d4d26264..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.1-8B-ShiningValiant2/e1d82962-59c9-44e7-9243-ea62f6639d1e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.1-8B-ShiningValiant2/1762652579.946434", - "retrieved_timestamp": "1762652579.946435", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ValiantLabs/Llama3.1-8B-ShiningValiant2", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3.1-8B-ShiningValiant2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26780608784691284 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4429290017852748 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39591666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.292719414893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.2-3B-Enigma/71e3ab93-9667-4e99-b0a1-e25b701b13fd.json b/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.2-3B-Enigma/71e3ab93-9667-4e99-b0a1-e25b701b13fd.json deleted file mode 100644 index 1b20e710015386c55bc68c7504d8aec147479581..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.2-3B-Enigma/71e3ab93-9667-4e99-b0a1-e25b701b13fd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.2-3B-Enigma/1762652579.94662", - "retrieved_timestamp": "1762652579.946621", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ValiantLabs/Llama3.2-3B-Enigma", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3.2-3B-Enigma" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2786218345102107 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3722590772046992 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3921354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2427692819148936 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.2-3B-Esper2/5567fc86-d3f8-4ef7-94d8-12fc28eeb9b4.json b/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.2-3B-Esper2/5567fc86-d3f8-4ef7-94d8-12fc28eeb9b4.json deleted file mode 100644 index e0e4b0b6ab9c77d52976302398618538fdef01d7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.2-3B-Esper2/5567fc86-d3f8-4ef7-94d8-12fc28eeb9b4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.2-3B-Esper2/1762652579.947128", - "retrieved_timestamp": "1762652579.9471302", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ValiantLabs/Llama3.2-3B-Esper2", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3.2-3B-Esper2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27497484452364174 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38082611390366106 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3549583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22573138297872342 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.2-3B-ShiningValiant2/6c3a0d11-d421-4420-9df7-359164a85893.json b/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.2-3B-ShiningValiant2/6c3a0d11-d421-4420-9df7-359164a85893.json deleted file mode 100644 index b9cdfa25c07d8d2ca1777a2c9a9546b488bb68c7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ValiantLabs_Llama3.2-3B-ShiningValiant2/6c3a0d11-d421-4420-9df7-359164a85893.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ValiantLabs_Llama3.2-3B-ShiningValiant2/1762652579.947389", - "retrieved_timestamp": "1762652579.9473898", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ValiantLabs/Llama3.2-3B-ShiningValiant2", - "developer": "meta", - "inference_platform": "unknown", - "id": "ValiantLabs/Llama3.2-3B-ShiningValiant2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625101397624968 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42259325337870185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0823262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38664583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28291223404255317 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Weyaxi_Einstein-v6.1-Llama3-8B/13c07664-1ff1-48a4-a43d-877fc05bd19d.json b/leaderboard_data/HFOpenLLMv2/meta/Weyaxi_Einstein-v6.1-Llama3-8B/13c07664-1ff1-48a4-a43d-877fc05bd19d.json deleted file mode 100644 index 5c46e273860789d2d3fb05ef602ac3e1d44f499d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Weyaxi_Einstein-v6.1-Llama3-8B/13c07664-1ff1-48a4-a43d-877fc05bd19d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Weyaxi_Einstein-v6.1-Llama3-8B/1762652579.9489238", - "retrieved_timestamp": "1762652579.948925", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Weyaxi/Einstein-v6.1-Llama3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Weyaxi/Einstein-v6.1-Llama3-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4568245588372186 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5008295581095018 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42128125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3130817819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Weyaxi_Einstein-v6.1-developed-by-Weyaxi-Llama3-8B/06985382-8aec-4aa3-85ff-774da25ed2d3.json b/leaderboard_data/HFOpenLLMv2/meta/Weyaxi_Einstein-v6.1-developed-by-Weyaxi-Llama3-8B/06985382-8aec-4aa3-85ff-774da25ed2d3.json deleted file mode 100644 index 8ea9c3de6d889942cead6f6ac024db925e3c404b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Weyaxi_Einstein-v6.1-developed-by-Weyaxi-Llama3-8B/06985382-8aec-4aa3-85ff-774da25ed2d3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Weyaxi_Einstein-v6.1-developed-by-Weyaxi-Llama3-8B/1762652579.9492018", - "retrieved_timestamp": "1762652579.949203", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Weyaxi/Einstein-v6.1-developed-by-Weyaxi-Llama3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Weyaxi/Einstein-v6.1-developed-by-Weyaxi-Llama3-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39270247388041507 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5043837450549643 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43324999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30925864361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Weyaxi_Einstein-v8-Llama3.2-1B/5edf6193-a8d6-41d3-b2fd-20f7ce537770.json b/leaderboard_data/HFOpenLLMv2/meta/Weyaxi_Einstein-v8-Llama3.2-1B/5edf6193-a8d6-41d3-b2fd-20f7ce537770.json deleted file mode 100644 index 42d78e7abdcdefad79be0d0cb5a3d676d829e20e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Weyaxi_Einstein-v8-Llama3.2-1B/5edf6193-a8d6-41d3-b2fd-20f7ce537770.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Weyaxi_Einstein-v8-Llama3.2-1B/1762652579.9499211", - "retrieved_timestamp": "1762652579.949922", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Weyaxi/Einstein-v8-Llama3.2-1B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Weyaxi/Einstein-v8-Llama3.2-1B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18622255615101263 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30184334823943154 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36178125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11610704787234043 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Xiaojian9992024_Llama3.2-1B-THREADRIPPER-v0.2/5ae4b63d-a84b-4468-aefe-8b5c7b88323e.json b/leaderboard_data/HFOpenLLMv2/meta/Xiaojian9992024_Llama3.2-1B-THREADRIPPER-v0.2/5ae4b63d-a84b-4468-aefe-8b5c7b88323e.json deleted file mode 100644 index 7cf568095d5c4def645d1c181782442fb9ead631..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Xiaojian9992024_Llama3.2-1B-THREADRIPPER-v0.2/5ae4b63d-a84b-4468-aefe-8b5c7b88323e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Llama3.2-1B-THREADRIPPER-v0.2/1762652579.952687", - "retrieved_timestamp": "1762652579.9526882", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Xiaojian9992024/Llama3.2-1B-THREADRIPPER-v0.2", - "developer": "meta", - "inference_platform": "unknown", - "id": "Xiaojian9992024/Llama3.2-1B-THREADRIPPER-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5317878783849076 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3527816493941946 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06570996978851963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33164583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1745345744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Xiaojian9992024_Llama3.2-1B-THREADRIPPER/b7c71bb9-0f3b-4d2f-8902-5fefac1629c5.json b/leaderboard_data/HFOpenLLMv2/meta/Xiaojian9992024_Llama3.2-1B-THREADRIPPER/b7c71bb9-0f3b-4d2f-8902-5fefac1629c5.json deleted file mode 100644 index c0602b9d528028fea772bcc11a8a0d899d1f8463..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Xiaojian9992024_Llama3.2-1B-THREADRIPPER/b7c71bb9-0f3b-4d2f-8902-5fefac1629c5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Llama3.2-1B-THREADRIPPER/1762652579.952322", - "retrieved_timestamp": "1762652579.952322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Xiaojian9992024/Llama3.2-1B-THREADRIPPER", - "developer": "meta", - "inference_platform": "unknown", - "id": "Xiaojian9992024/Llama3.2-1B-THREADRIPPER" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5575916346405316 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35437497890840614 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31297916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17627992021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Xkev_Llama-3.2V-11B-cot/55f777f4-460f-4b83-a309-7e9e9113fd55.json b/leaderboard_data/HFOpenLLMv2/meta/Xkev_Llama-3.2V-11B-cot/55f777f4-460f-4b83-a309-7e9e9113fd55.json deleted file mode 100644 index d53baa335445225d3af5deafa6269c4b9b14b677..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Xkev_Llama-3.2V-11B-cot/55f777f4-460f-4b83-a309-7e9e9113fd55.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Xkev_Llama-3.2V-11B-cot/1762652579.9552681", - "retrieved_timestamp": "1762652579.955269", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Xkev/Llama-3.2V-11B-cot", - "developer": "meta", - "inference_platform": "unknown", - "id": "Xkev/Llama-3.2V-11B-cot" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41580894249480266 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.495871783411897 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1555891238670695 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4158541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35871010638297873 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MllamaForConditionalGeneration", - "params_billions": 10.67 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Yuma42_Llama3.1-IgneousIguana-8B/cd2f97bc-3f4d-43f2-b100-09eec8d122a6.json b/leaderboard_data/HFOpenLLMv2/meta/Yuma42_Llama3.1-IgneousIguana-8B/cd2f97bc-3f4d-43f2-b100-09eec8d122a6.json deleted file mode 100644 index f2b46a34fc3509130fbe68d2e26d7b2011e9ddee..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Yuma42_Llama3.1-IgneousIguana-8B/cd2f97bc-3f4d-43f2-b100-09eec8d122a6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Yuma42_Llama3.1-IgneousIguana-8B/1762652579.965119", - "retrieved_timestamp": "1762652579.965119", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Yuma42/Llama3.1-IgneousIguana-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Yuma42/Llama3.1-IgneousIguana-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8133297428600558 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5190512670457804 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21978851963746224 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42026041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39735704787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/Yuma42_Llama3.1-SuperHawk-8B/458dd163-075e-48ca-bb3b-650912f55696.json b/leaderboard_data/HFOpenLLMv2/meta/Yuma42_Llama3.1-SuperHawk-8B/458dd163-075e-48ca-bb3b-650912f55696.json deleted file mode 100644 index 716f2e2d2707b5b30e32f85bf409128b40a2d7ae..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/Yuma42_Llama3.1-SuperHawk-8B/458dd163-075e-48ca-bb3b-650912f55696.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Yuma42_Llama3.1-SuperHawk-8B/1762652579.965369", - "retrieved_timestamp": "1762652579.9653702", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Yuma42/Llama3.1-SuperHawk-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "Yuma42/Llama3.1-SuperHawk-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7986420475449585 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5199931545260023 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2348942598187311 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40835416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39453125 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ZeroXClem_Llama-3.1-8B-AthenaSky-MegaMix/2c35754b-3763-4098-8686-39694028e0d9.json b/leaderboard_data/HFOpenLLMv2/meta/ZeroXClem_Llama-3.1-8B-AthenaSky-MegaMix/2c35754b-3763-4098-8686-39694028e0d9.json deleted file mode 100644 index 019a8a7c810489a4bd4c69a470a823b1fd892445..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ZeroXClem_Llama-3.1-8B-AthenaSky-MegaMix/2c35754b-3763-4098-8686-39694028e0d9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ZeroXClem_Llama-3.1-8B-AthenaSky-MegaMix/1762652579.966579", - "retrieved_timestamp": "1762652579.96658", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ZeroXClem/Llama-3.1-8B-AthenaSky-MegaMix", - "developer": "meta", - "inference_platform": "unknown", - "id": "ZeroXClem/Llama-3.1-8B-AthenaSky-MegaMix" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.63008151704145 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5163423288466883 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2794561933534743 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35384375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3503989361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ZeroXClem_Llama-3.1-8B-RainbowLight-EtherealMix/18072fb3-a27a-4ad7-93ef-a3770637a0dc.json b/leaderboard_data/HFOpenLLMv2/meta/ZeroXClem_Llama-3.1-8B-RainbowLight-EtherealMix/18072fb3-a27a-4ad7-93ef-a3770637a0dc.json deleted file mode 100644 index a54390a0b0b1a012acf5b8170f8355f5bda0fa9d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ZeroXClem_Llama-3.1-8B-RainbowLight-EtherealMix/18072fb3-a27a-4ad7-93ef-a3770637a0dc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ZeroXClem_Llama-3.1-8B-RainbowLight-EtherealMix/1762652579.96684", - "retrieved_timestamp": "1762652579.966841", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ZeroXClem/Llama-3.1-8B-RainbowLight-EtherealMix", - "developer": "meta", - "inference_platform": "unknown", - "id": "ZeroXClem/Llama-3.1-8B-RainbowLight-EtherealMix" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49734149833552754 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5154785280029148 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39470833333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.363031914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ZeroXClem_Llama-3.1-8B-SpecialTitanFusion/38be33eb-3dfb-4987-a2f0-14ceb9d834f7.json b/leaderboard_data/HFOpenLLMv2/meta/ZeroXClem_Llama-3.1-8B-SpecialTitanFusion/38be33eb-3dfb-4987-a2f0-14ceb9d834f7.json deleted file mode 100644 index 317cd27c49460fa94f742ed805787c7cf7e74d67..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ZeroXClem_Llama-3.1-8B-SpecialTitanFusion/38be33eb-3dfb-4987-a2f0-14ceb9d834f7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ZeroXClem_Llama-3.1-8B-SpecialTitanFusion/1762652579.967058", - "retrieved_timestamp": "1762652579.967059", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ZeroXClem/Llama-3.1-8B-SpecialTitanFusion", - "developer": "meta", - "inference_platform": "unknown", - "id": "ZeroXClem/Llama-3.1-8B-SpecialTitanFusion" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7402403400754443 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5438928349489152 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23338368580060423 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38739583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3621176861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ZeroXClem_Llama-3.1-8B-SuperNova-EtherealHermes/1007d3aa-f8ca-420c-b974-a0f552c649ac.json b/leaderboard_data/HFOpenLLMv2/meta/ZeroXClem_Llama-3.1-8B-SuperNova-EtherealHermes/1007d3aa-f8ca-420c-b974-a0f552c649ac.json deleted file mode 100644 index 36d81d1fb06bebdc5de7ffc7a92ccd6fa04b61c6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ZeroXClem_Llama-3.1-8B-SuperNova-EtherealHermes/1007d3aa-f8ca-420c-b974-a0f552c649ac.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ZeroXClem_Llama-3.1-8B-SuperNova-EtherealHermes/1762652579.967272", - "retrieved_timestamp": "1762652579.967272", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ZeroXClem/Llama-3.1-8B-SuperNova-EtherealHermes", - "developer": "meta", - "inference_platform": "unknown", - "id": "ZeroXClem/Llama-3.1-8B-SuperNova-EtherealHermes" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7338705745200512 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5244464882599044 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17447129909365558 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4065833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37450132978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ZeroXClem_Llama-3.1-8B-SuperTulu-LexiNova/ba3564f4-f48f-4548-ae15-b5f78c4b44f4.json b/leaderboard_data/HFOpenLLMv2/meta/ZeroXClem_Llama-3.1-8B-SuperTulu-LexiNova/ba3564f4-f48f-4548-ae15-b5f78c4b44f4.json deleted file mode 100644 index 25686b3dc9d23138f500dbd507ff40f26436ceb4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ZeroXClem_Llama-3.1-8B-SuperTulu-LexiNova/ba3564f4-f48f-4548-ae15-b5f78c4b44f4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ZeroXClem_Llama-3.1-8B-SuperTulu-LexiNova/1762652579.96749", - "retrieved_timestamp": "1762652579.9674911", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ZeroXClem/Llama-3.1-8B-SuperTulu-LexiNova", - "developer": "meta", - "inference_platform": "unknown", - "id": "ZeroXClem/Llama-3.1-8B-SuperTulu-LexiNova" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4164583305629064 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5078595074869328 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25302114803625375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39706249999999993 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3367686170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/aaditya_Llama3-OpenBioLLM-70B/e68ae3f7-3f46-43bb-8e14-0523af96998e.json b/leaderboard_data/HFOpenLLMv2/meta/aaditya_Llama3-OpenBioLLM-70B/e68ae3f7-3f46-43bb-8e14-0523af96998e.json deleted file mode 100644 index c0f59e191a653c9dc4cad665cfc1779722e4f09f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/aaditya_Llama3-OpenBioLLM-70B/e68ae3f7-3f46-43bb-8e14-0523af96998e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/aaditya_Llama3-OpenBioLLM-70B/1762652579.969287", - "retrieved_timestamp": "1762652579.9692879", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "aaditya/Llama3-OpenBioLLM-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "aaditya/Llama3-OpenBioLLM-70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7596743307756753 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6398872375485518 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1971299093655589 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44171875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4867021276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/abacusai_Llama-3-Smaug-8B/ea57e277-5694-4981-ac47-d2fa633847ca.json b/leaderboard_data/HFOpenLLMv2/meta/abacusai_Llama-3-Smaug-8B/ea57e277-5694-4981-ac47-d2fa633847ca.json deleted file mode 100644 index f2c0d715b59c55d7aa6ad1688b9473c4ef1ef6ea..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/abacusai_Llama-3-Smaug-8B/ea57e277-5694-4981-ac47-d2fa633847ca.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/abacusai_Llama-3-Smaug-8B/1762652579.9700851", - "retrieved_timestamp": "1762652579.9700859", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "abacusai/Llama-3-Smaug-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "abacusai/Llama-3-Smaug-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48667535472546175 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4930712769667174 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08534743202416918 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36224999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3184840425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/abhishek_autotrain-llama3-70b-orpo-v1/eb2ee4fb-cc98-4937-a385-19a5e783d1a7.json b/leaderboard_data/HFOpenLLMv2/meta/abhishek_autotrain-llama3-70b-orpo-v1/eb2ee4fb-cc98-4937-a385-19a5e783d1a7.json deleted file mode 100644 index 6d480b76df3452791d3d10cf1264b4dec30f571c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/abhishek_autotrain-llama3-70b-orpo-v1/eb2ee4fb-cc98-4937-a385-19a5e783d1a7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/abhishek_autotrain-llama3-70b-orpo-v1/1762652579.973002", - "retrieved_timestamp": "1762652579.973003", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "abhishek/autotrain-llama3-70b-orpo-v1", - "developer": "meta", - "inference_platform": "unknown", - "id": "abhishek/autotrain-llama3-70b-orpo-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4233023932055834 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5997985900252331 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24412751677852348 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35790625000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11220079787234043 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/abhishek_autotrain-llama3-70b-orpo-v2/15617903-e280-4c61-a326-5f615b46b3a8.json b/leaderboard_data/HFOpenLLMv2/meta/abhishek_autotrain-llama3-70b-orpo-v2/15617903-e280-4c61-a326-5f615b46b3a8.json deleted file mode 100644 index 6a53a9d9ad635a2693269085610f7b0bb172ca39..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/abhishek_autotrain-llama3-70b-orpo-v2/15617903-e280-4c61-a326-5f615b46b3a8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/abhishek_autotrain-llama3-70b-orpo-v2/1762652579.9732742", - "retrieved_timestamp": "1762652579.973275", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "abhishek/autotrain-llama3-70b-orpo-v2", - "developer": "meta", - "inference_platform": "unknown", - "id": "abhishek/autotrain-llama3-70b-orpo-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5406055931594835 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5899473641612185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2107250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41133333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48179853723404253 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/abhishek_autotrain-llama3-orpo-v2/f8515d35-c7e8-440b-a61f-16f5acfdc003.json b/leaderboard_data/HFOpenLLMv2/meta/abhishek_autotrain-llama3-orpo-v2/f8515d35-c7e8-440b-a61f-16f5acfdc003.json deleted file mode 100644 index d8212bad38994715e73f42881c5c8420de6bfbb6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/abhishek_autotrain-llama3-orpo-v2/f8515d35-c7e8-440b-a61f-16f5acfdc003.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/abhishek_autotrain-llama3-orpo-v2/1762652579.9735", - "retrieved_timestamp": "1762652579.973501", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "abhishek/autotrain-llama3-orpo-v2", - "developer": "meta", - "inference_platform": "unknown", - "id": "abhishek/autotrain-llama3-orpo-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4371656094717572 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31593828880846425 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3792395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22182513297872342 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/agentlans_Llama3.1-8B-drill/869f9850-417b-43d7-bb40-61375a8bb09c.json b/leaderboard_data/HFOpenLLMv2/meta/agentlans_Llama3.1-8B-drill/869f9850-417b-43d7-bb40-61375a8bb09c.json deleted file mode 100644 index bda25520af4c009180235991e8a0404dc13aef2b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/agentlans_Llama3.1-8B-drill/869f9850-417b-43d7-bb40-61375a8bb09c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/agentlans_Llama3.1-8B-drill/1762652579.976306", - "retrieved_timestamp": "1762652579.976307", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "agentlans/Llama3.1-8B-drill", - "developer": "meta", - "inference_platform": "unknown", - "id": "agentlans/Llama3.1-8B-drill" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.765169749597734 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5015680021795333 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1714501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36723958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37757646276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/agentlans_Llama3.1-Daredevilish/417b2c35-090e-42c3-8a92-04f7258702a3.json b/leaderboard_data/HFOpenLLMv2/meta/agentlans_Llama3.1-Daredevilish/417b2c35-090e-42c3-8a92-04f7258702a3.json deleted file mode 100644 index 81e8ae7fbd2a63934c55ac4378dd9bd887aadd0a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/agentlans_Llama3.1-Daredevilish/417b2c35-090e-42c3-8a92-04f7258702a3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/agentlans_Llama3.1-Daredevilish/1762652579.976594", - "retrieved_timestamp": "1762652579.976595", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "agentlans/Llama3.1-Daredevilish", - "developer": "meta", - "inference_platform": "unknown", - "id": "agentlans/Llama3.1-Daredevilish" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6291573026237051 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5012506630648397 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40909375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3696808510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/agentlans_Llama3.1-LexiHermes-SuperStorm/6f966179-a456-4914-807d-45ab507e0388.json b/leaderboard_data/HFOpenLLMv2/meta/agentlans_Llama3.1-LexiHermes-SuperStorm/6f966179-a456-4914-807d-45ab507e0388.json deleted file mode 100644 index 5ad561941f81d92f950f12cb237c322987d7c0c4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/agentlans_Llama3.1-LexiHermes-SuperStorm/6f966179-a456-4914-807d-45ab507e0388.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/agentlans_Llama3.1-LexiHermes-SuperStorm/1762652579.97705", - "retrieved_timestamp": "1762652579.9770508", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "agentlans/Llama3.1-LexiHermes-SuperStorm", - "developer": "meta", - "inference_platform": "unknown", - "id": "agentlans/Llama3.1-LexiHermes-SuperStorm" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7834545672149895 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5266460888159817 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16163141993957703 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3962604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3843916223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/agentlans_Llama3.1-SuperDeepFuse-CrashCourse12K/455bd496-7a32-45c9-a792-3982781fdc16.json b/leaderboard_data/HFOpenLLMv2/meta/agentlans_Llama3.1-SuperDeepFuse-CrashCourse12K/455bd496-7a32-45c9-a792-3982781fdc16.json deleted file mode 100644 index 1fa2d64c7380862e4c0caf863968273c57d3c21e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/agentlans_Llama3.1-SuperDeepFuse-CrashCourse12K/455bd496-7a32-45c9-a792-3982781fdc16.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/agentlans_Llama3.1-SuperDeepFuse-CrashCourse12K/1762652579.977621", - "retrieved_timestamp": "1762652579.977621", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "agentlans/Llama3.1-SuperDeepFuse-CrashCourse12K", - "developer": "meta", - "inference_platform": "unknown", - "id": "agentlans/Llama3.1-SuperDeepFuse-CrashCourse12K" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.718732961874493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5215513828266275 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18051359516616314 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40264583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3631150265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/agentlans_Llama3.1-SuperDeepFuse/6301252b-2353-438a-9e60-c6a572adfc5f.json b/leaderboard_data/HFOpenLLMv2/meta/agentlans_Llama3.1-SuperDeepFuse/6301252b-2353-438a-9e60-c6a572adfc5f.json deleted file mode 100644 index 92c9c717381ce65c31e8a103a517eae02f81d246..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/agentlans_Llama3.1-SuperDeepFuse/6301252b-2353-438a-9e60-c6a572adfc5f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/agentlans_Llama3.1-SuperDeepFuse/1762652579.977348", - "retrieved_timestamp": "1762652579.97735", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "agentlans/Llama3.1-SuperDeepFuse", - "developer": "meta", - "inference_platform": "unknown", - "id": "agentlans/Llama3.1-SuperDeepFuse" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7761605872418517 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5048544889908054 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18277945619335348 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.369875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3774933510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ahmeda335_13_outOf_32_pruned_layers_llama3.1-8b/54da4a97-6e12-4bb0-9138-dacd981b04bf.json b/leaderboard_data/HFOpenLLMv2/meta/ahmeda335_13_outOf_32_pruned_layers_llama3.1-8b/54da4a97-6e12-4bb0-9138-dacd981b04bf.json deleted file mode 100644 index e3f29fc07d6f5d5e99f8f6616462cc911fe7dacf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ahmeda335_13_outOf_32_pruned_layers_llama3.1-8b/54da4a97-6e12-4bb0-9138-dacd981b04bf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ahmeda335_13_outOf_32_pruned_layers_llama3.1-8b/1762652579.97824", - "retrieved_timestamp": "1762652579.978241", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ahmeda335/13_outOf_32_pruned_layers_llama3.1-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "ahmeda335/13_outOf_32_pruned_layers_llama3.1-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17480728910402177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2883257760266153 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3803229166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11286569148936171 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 5.195 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/akhadangi_Llama3.2.1B.0.01-First/d07eada4-e73c-4dd6-8538-e3a9cd471f34.json b/leaderboard_data/HFOpenLLMv2/meta/akhadangi_Llama3.2.1B.0.01-First/d07eada4-e73c-4dd6-8538-e3a9cd471f34.json deleted file mode 100644 index f6f4d3c8fab388c755d3fcede61751188bc4d669..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/akhadangi_Llama3.2.1B.0.01-First/d07eada4-e73c-4dd6-8538-e3a9cd471f34.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/akhadangi_Llama3.2.1B.0.01-First/1762652579.979876", - "retrieved_timestamp": "1762652579.979876", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "akhadangi/Llama3.2.1B.0.01-First", - "developer": "meta", - "inference_platform": "unknown", - "id": "akhadangi/Llama3.2.1B.0.01-First" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08135857303066973 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31891926453372005 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3193958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1196808510638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/akhadangi_Llama3.2.1B.0.01-Last/9f796e5e-6c31-46e0-b839-e21d33a403c4.json b/leaderboard_data/HFOpenLLMv2/meta/akhadangi_Llama3.2.1B.0.01-Last/9f796e5e-6c31-46e0-b839-e21d33a403c4.json deleted file mode 100644 index 320fe11906ad86193727d90665db535dbfd4e77a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/akhadangi_Llama3.2.1B.0.01-Last/9f796e5e-6c31-46e0-b839-e21d33a403c4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/akhadangi_Llama3.2.1B.0.01-Last/1762652579.980133", - "retrieved_timestamp": "1762652579.9801338", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "akhadangi/Llama3.2.1B.0.01-Last", - "developer": "meta", - "inference_platform": "unknown", - "id": "akhadangi/Llama3.2.1B.0.01-Last" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09165015492227291 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3159283874883156 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3206354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12267287234042554 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/akhadangi_Llama3.2.1B.0.1-First/4ec306d4-3f34-4330-9898-fb5ccb9a3483.json b/leaderboard_data/HFOpenLLMv2/meta/akhadangi_Llama3.2.1B.0.1-First/4ec306d4-3f34-4330-9898-fb5ccb9a3483.json deleted file mode 100644 index 5c40571a35f9e2817683c8057358c2822b35d2e2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/akhadangi_Llama3.2.1B.0.1-First/4ec306d4-3f34-4330-9898-fb5ccb9a3483.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/akhadangi_Llama3.2.1B.0.1-First/1762652579.9803479", - "retrieved_timestamp": "1762652579.9803488", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "akhadangi/Llama3.2.1B.0.1-First", - "developer": "meta", - "inference_platform": "unknown", - "id": "akhadangi/Llama3.2.1B.0.1-First" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10009330797838623 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3119615016336897 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24496644295302014 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.330125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11693816489361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/akhadangi_Llama3.2.1B.0.1-Last/82c24fd7-de74-4dc8-bd22-5761243ed826.json b/leaderboard_data/HFOpenLLMv2/meta/akhadangi_Llama3.2.1B.0.1-Last/82c24fd7-de74-4dc8-bd22-5761243ed826.json deleted file mode 100644 index 294727f6eec68b515a456215766db269badf1724..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/akhadangi_Llama3.2.1B.0.1-Last/82c24fd7-de74-4dc8-bd22-5761243ed826.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/akhadangi_Llama3.2.1B.0.1-Last/1762652579.980555", - "retrieved_timestamp": "1762652579.980556", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "akhadangi/Llama3.2.1B.0.1-Last", - "developer": "meta", - "inference_platform": "unknown", - "id": "akhadangi/Llama3.2.1B.0.1-Last" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09497245087479 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3163776768490709 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23825503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3340625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11776928191489362 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/akhadangi_Llama3.2.1B.BaseFiT/8577766f-d696-489d-8194-31b48c17941a.json b/leaderboard_data/HFOpenLLMv2/meta/akhadangi_Llama3.2.1B.BaseFiT/8577766f-d696-489d-8194-31b48c17941a.json deleted file mode 100644 index daec7b160d327468674d2c5d34b88fe040973318..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/akhadangi_Llama3.2.1B.BaseFiT/8577766f-d696-489d-8194-31b48c17941a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/akhadangi_Llama3.2.1B.BaseFiT/1762652579.980761", - "retrieved_timestamp": "1762652579.980762", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "akhadangi/Llama3.2.1B.BaseFiT", - "developer": "meta", - "inference_platform": "unknown", - "id": "akhadangi/Llama3.2.1B.BaseFiT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08827799128534511 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31745151457535453 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3220625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1171875 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/akjindal53244_Llama-3.1-Storm-8B/de2d2321-b6ed-4791-9114-757afc963876.json b/leaderboard_data/HFOpenLLMv2/meta/akjindal53244_Llama-3.1-Storm-8B/de2d2321-b6ed-4791-9114-757afc963876.json deleted file mode 100644 index 776bb8c9bff80ee6078887cde7e5f5679618f6c0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/akjindal53244_Llama-3.1-Storm-8B/de2d2321-b6ed-4791-9114-757afc963876.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/akjindal53244_Llama-3.1-Storm-8B/1762652579.981211", - "retrieved_timestamp": "1762652579.981212", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "akjindal53244/Llama-3.1-Storm-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "akjindal53244/Llama-3.1-Storm-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8050616807847621 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5188671226840744 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17220543806646524 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4028020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3803191489361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/akjindal53244_Llama-3.1-Storm-8B/f9aad6f2-ba24-47de-a613-b4011a2c52d1.json b/leaderboard_data/HFOpenLLMv2/meta/akjindal53244_Llama-3.1-Storm-8B/f9aad6f2-ba24-47de-a613-b4011a2c52d1.json deleted file mode 100644 index dc0c8dcebed30bdc724700345ef3a4786300a7f6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/akjindal53244_Llama-3.1-Storm-8B/f9aad6f2-ba24-47de-a613-b4011a2c52d1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/akjindal53244_Llama-3.1-Storm-8B/1762652579.980961", - "retrieved_timestamp": "1762652579.980962", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "akjindal53244/Llama-3.1-Storm-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "akjindal53244/Llama-3.1-Storm-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.803263119633683 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5196330402870707 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1623867069486405 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4028333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3812333776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/alcholjung_llama3_medical_tuned/30324407-0848-48ae-bbd7-80676d9467db.json b/leaderboard_data/HFOpenLLMv2/meta/alcholjung_llama3_medical_tuned/30324407-0848-48ae-bbd7-80676d9467db.json deleted file mode 100644 index be6619f0002708d92b0d124151ad3d8afd6d4779..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/alcholjung_llama3_medical_tuned/30324407-0848-48ae-bbd7-80676d9467db.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/alcholjung_llama3_medical_tuned/1762652579.9813929", - "retrieved_timestamp": "1762652579.9813938", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "alcholjung/llama3_medical_tuned", - "developer": "meta", - "inference_platform": "unknown", - "id": "alcholjung/llama3_medical_tuned" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010566408241244343 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4512943191660926 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46602083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29463098404255317 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 16.061 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/allenai_Llama-3.1-Tulu-3-70B/006cafcb-452f-4df0-b42c-058719eb63e4.json b/leaderboard_data/HFOpenLLMv2/meta/allenai_Llama-3.1-Tulu-3-70B/006cafcb-452f-4df0-b42c-058719eb63e4.json deleted file mode 100644 index 0570457205db314f8bef3003c87dcfddebbcdaf0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/allenai_Llama-3.1-Tulu-3-70B/006cafcb-452f-4df0-b42c-058719eb63e4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-70B/1762652579.981659", - "retrieved_timestamp": "1762652579.981659", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allenai/Llama-3.1-Tulu-3-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "allenai/Llama-3.1-Tulu-3-70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8291167435737177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6163626496199947 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4501510574018127 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4948333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46451130319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/allenai_Llama-3.1-Tulu-3-70B/5683ed15-2699-4f0c-8e74-a65ff2d4dd49.json b/leaderboard_data/HFOpenLLMv2/meta/allenai_Llama-3.1-Tulu-3-70B/5683ed15-2699-4f0c-8e74-a65ff2d4dd49.json deleted file mode 100644 index 4d5e56626aed148f471061464e3d0ea4dfc88383..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/allenai_Llama-3.1-Tulu-3-70B/5683ed15-2699-4f0c-8e74-a65ff2d4dd49.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-70B/1762652579.981919", - "retrieved_timestamp": "1762652579.981919", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allenai/Llama-3.1-Tulu-3-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "allenai/Llama-3.1-Tulu-3-70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8379344583482937 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6156847169556112 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49880208333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4655917553191489 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/allenai_Llama-3.1-Tulu-3-8B-RM/1a363aad-a1e7-404e-8c4a-4132f4fbab2b.json b/leaderboard_data/HFOpenLLMv2/meta/allenai_Llama-3.1-Tulu-3-8B-RM/1a363aad-a1e7-404e-8c4a-4132f4fbab2b.json deleted file mode 100644 index 53dfb524af3af585da99b66479073c69308dc60d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/allenai_Llama-3.1-Tulu-3-8B-RM/1a363aad-a1e7-404e-8c4a-4132f4fbab2b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-8B-RM/1762652579.9831831", - "retrieved_timestamp": "1762652579.9831831", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allenai/Llama-3.1-Tulu-3-8B-RM", - "developer": "meta", - "inference_platform": "unknown", - "id": "allenai/Llama-3.1-Tulu-3-8B-RM" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16701352411601217 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2950041147470504 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3764166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10821143617021277 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForSequenceClassification", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/allenai_Llama-3.1-Tulu-3-8B/5ad18861-1b4d-456d-9e1c-e945c1f71530.json b/leaderboard_data/HFOpenLLMv2/meta/allenai_Llama-3.1-Tulu-3-8B/5ad18861-1b4d-456d-9e1c-e945c1f71530.json deleted file mode 100644 index 52d7186f279c31c559f545c46f128101e839287c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/allenai_Llama-3.1-Tulu-3-8B/5ad18861-1b4d-456d-9e1c-e945c1f71530.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-8B/1762652579.9825459", - "retrieved_timestamp": "1762652579.982547", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allenai/Llama-3.1-Tulu-3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "allenai/Llama-3.1-Tulu-3-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8266687943545348 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4049833102731906 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19637462235649547 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41746875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2826628989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/allenai_Llama-3.1-Tulu-3-8B/8a7c4b5a-85c7-4fc6-af4c-e9cde5d32d8b.json b/leaderboard_data/HFOpenLLMv2/meta/allenai_Llama-3.1-Tulu-3-8B/8a7c4b5a-85c7-4fc6-af4c-e9cde5d32d8b.json deleted file mode 100644 index 00d3f6bd103e5b677e9e919f26df0e839eec8f92..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/allenai_Llama-3.1-Tulu-3-8B/8a7c4b5a-85c7-4fc6-af4c-e9cde5d32d8b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-8B/1762652579.982752", - "retrieved_timestamp": "1762652579.982752", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allenai/Llama-3.1-Tulu-3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "allenai/Llama-3.1-Tulu-3-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8254697535871487 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40608256120952024 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21148036253776434 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41746875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2820811170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/allknowingroger_Llama3.1-60B/21684c0e-c9b7-4375-bf05-cf63e9bd19b4.json b/leaderboard_data/HFOpenLLMv2/meta/allknowingroger_Llama3.1-60B/21684c0e-c9b7-4375-bf05-cf63e9bd19b4.json deleted file mode 100644 index 54aa866618ff7d86db96ef5d2ebd1c709242f395..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/allknowingroger_Llama3.1-60B/21684c0e-c9b7-4375-bf05-cf63e9bd19b4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Llama3.1-60B/1762652579.989347", - "retrieved_timestamp": "1762652579.9893482", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Llama3.1-60B", - "developer": "meta", - "inference_platform": "unknown", - "id": "allknowingroger/Llama3.1-60B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18145188100905596 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32417552719382076 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3595833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3310339095744681 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 61.997 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/allknowingroger_Yillama-40B/ab5ef6c9-76de-470e-b524-497036db94d4.json b/leaderboard_data/HFOpenLLMv2/meta/allknowingroger_Yillama-40B/ab5ef6c9-76de-470e-b524-497036db94d4.json deleted file mode 100644 index bf67cf0a48d31b7ac45222bb4a42718db0ebb129..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/allknowingroger_Yillama-40B/ab5ef6c9-76de-470e-b524-497036db94d4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Yillama-40B/1762652580.004728", - "retrieved_timestamp": "1762652580.004729", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Yillama-40B", - "developer": "meta", - "inference_platform": "unknown", - "id": "allknowingroger/Yillama-40B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16968643200042555 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40628855371888356 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3500625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1981382978723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/allknowingroger_llama3-Jallabi-40B-s/d46307f8-774b-4871-a32a-6c5a9cc6b1b8.json b/leaderboard_data/HFOpenLLMv2/meta/allknowingroger_llama3-Jallabi-40B-s/d46307f8-774b-4871-a32a-6c5a9cc6b1b8.json deleted file mode 100644 index 6d5e63687e43dd9f4c1bfa8eae1688667e2e6e02..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/allknowingroger_llama3-Jallabi-40B-s/d46307f8-774b-4871-a32a-6c5a9cc6b1b8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_llama3-Jallabi-40B-s/1762652580.006197", - "retrieved_timestamp": "1762652580.006198", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/llama3-Jallabi-40B-s", - "developer": "meta", - "inference_platform": "unknown", - "id": "allknowingroger/llama3-Jallabi-40B-s" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19206815693471102 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32522424198526295 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23741610738255034 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37495833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10879321808510638 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 18.769 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/allknowingroger_llama3AnFeng-40B/dc25bda9-966c-44f8-991b-ad891d59befe.json b/leaderboard_data/HFOpenLLMv2/meta/allknowingroger_llama3AnFeng-40B/dc25bda9-966c-44f8-991b-ad891d59befe.json deleted file mode 100644 index de53ead453d6b2b03b587494b383548e99d35d9e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/allknowingroger_llama3AnFeng-40B/dc25bda9-966c-44f8-991b-ad891d59befe.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_llama3AnFeng-40B/1762652580.006448", - "retrieved_timestamp": "1762652580.006449", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/llama3AnFeng-40B", - "developer": "meta", - "inference_platform": "unknown", - "id": "allknowingroger/llama3AnFeng-40B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17420776872032873 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3794080447660335 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39399999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1979720744680851 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 39.971 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/aloobun_Meta-Llama-3-7B-28Layers/f020ec4e-f026-4034-a219-1aacfcbb16b0.json b/leaderboard_data/HFOpenLLMv2/meta/aloobun_Meta-Llama-3-7B-28Layers/f020ec4e-f026-4034-a219-1aacfcbb16b0.json deleted file mode 100644 index 8dbaed181b60246b685b5e361b6e5ed2de37523d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/aloobun_Meta-Llama-3-7B-28Layers/f020ec4e-f026-4034-a219-1aacfcbb16b0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/aloobun_Meta-Llama-3-7B-28Layers/1762652580.0090299", - "retrieved_timestamp": "1762652580.0090308", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "aloobun/Meta-Llama-3-7B-28Layers", - "developer": "meta", - "inference_platform": "unknown", - "id": "aloobun/Meta-Llama-3-7B-28Layers" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19636453498938372 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4437497014253391 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35892708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3159906914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.158 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/amd_AMD-Llama-135m/086ca0cf-79a3-4b94-980d-9384f1848562.json b/leaderboard_data/HFOpenLLMv2/meta/amd_AMD-Llama-135m/086ca0cf-79a3-4b94-980d-9384f1848562.json deleted file mode 100644 index 9f786d394f889d8790abdde77a03f1193a0f11a5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/amd_AMD-Llama-135m/086ca0cf-79a3-4b94-980d-9384f1848562.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/amd_AMD-Llama-135m/1762652580.010782", - "retrieved_timestamp": "1762652580.010783", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "amd/AMD-Llama-135m", - "developer": "meta", - "inference_platform": "unknown", - "id": "amd/AMD-Llama-135m" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19184319826948054 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29694449748780255 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38457291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11685505319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.134 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/amd_AMD-Llama-135m/4a623195-2073-4637-b748-696012109846.json b/leaderboard_data/HFOpenLLMv2/meta/amd_AMD-Llama-135m/4a623195-2073-4637-b748-696012109846.json deleted file mode 100644 index d8c8cb4d510ab3ac1431d2f43c75edfe66ec1641..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/amd_AMD-Llama-135m/4a623195-2073-4637-b748-696012109846.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/amd_AMD-Llama-135m/1762652580.010537", - "retrieved_timestamp": "1762652580.010538", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "amd/AMD-Llama-135m", - "developer": "meta", - "inference_platform": "unknown", - "id": "amd/AMD-Llama-135m" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18422452426229072 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2973931917569524 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37796874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11685505319148937 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/arcee-ai_Llama-3.1-SuperNova-Lite/4bc80120-a5e2-4824-b278-c2de7140a2bf.json b/leaderboard_data/HFOpenLLMv2/meta/arcee-ai_Llama-3.1-SuperNova-Lite/4bc80120-a5e2-4824-b278-c2de7140a2bf.json deleted file mode 100644 index 9b73f51d68db0fb25dcc9d2e0710a64bd7ae6dd7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/arcee-ai_Llama-3.1-SuperNova-Lite/4bc80120-a5e2-4824-b278-c2de7140a2bf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/arcee-ai_Llama-3.1-SuperNova-Lite/1762652580.016114", - "retrieved_timestamp": "1762652580.016115", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "arcee-ai/Llama-3.1-SuperNova-Lite", - "developer": "meta", - "inference_platform": "unknown", - "id": "arcee-ai/Llama-3.1-SuperNova-Lite" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8017393848322452 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5151992115104819 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18277945619335348 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41632291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3877160904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/arcee-ai_Llama-Spark/aaceb35d-4106-4d6c-b895-446b87394f3b.json b/leaderboard_data/HFOpenLLMv2/meta/arcee-ai_Llama-Spark/aaceb35d-4106-4d6c-b895-446b87394f3b.json deleted file mode 100644 index a3e7b60c5551d3bdbaec70a33356585d805ad562..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/arcee-ai_Llama-Spark/aaceb35d-4106-4d6c-b895-446b87394f3b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/arcee-ai_Llama-Spark/1762652580.0163891", - "retrieved_timestamp": "1762652580.0163898", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "arcee-ai/Llama-Spark", - "developer": "meta", - "inference_platform": "unknown", - "id": "arcee-ai/Llama-Spark" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7910732412221794 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5053504145749979 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13897280966767372 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35933333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3720910904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/argilla-warehouse_Llama-3.1-8B-MagPie-Ultra/4e4260dc-81e0-4e2f-a7ce-dd6a0f7e0796.json b/leaderboard_data/HFOpenLLMv2/meta/argilla-warehouse_Llama-3.1-8B-MagPie-Ultra/4e4260dc-81e0-4e2f-a7ce-dd6a0f7e0796.json deleted file mode 100644 index 1ca168b87f51617e73b1b8a8086ab9a7e7c2d5ed..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/argilla-warehouse_Llama-3.1-8B-MagPie-Ultra/4e4260dc-81e0-4e2f-a7ce-dd6a0f7e0796.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/argilla-warehouse_Llama-3.1-8B-MagPie-Ultra/1762652580.018188", - "retrieved_timestamp": "1762652580.018189", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "argilla-warehouse/Llama-3.1-8B-MagPie-Ultra", - "developer": "meta", - "inference_platform": "unknown", - "id": "argilla-warehouse/Llama-3.1-8B-MagPie-Ultra" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5756514935925566 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46196134634468616 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0770392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35425 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31441156914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/asharsha30_LLAMA_Harsha_8_B_ORDP_10k/61523c37-faee-4708-be49-4c7e31d760e6.json b/leaderboard_data/HFOpenLLMv2/meta/asharsha30_LLAMA_Harsha_8_B_ORDP_10k/61523c37-faee-4708-be49-4c7e31d760e6.json deleted file mode 100644 index 36448adb235c1433fe5ea055faa7b0decc593e01..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/asharsha30_LLAMA_Harsha_8_B_ORDP_10k/61523c37-faee-4708-be49-4c7e31d760e6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/asharsha30_LLAMA_Harsha_8_B_ORDP_10k/1762652580.01895", - "retrieved_timestamp": "1762652580.018951", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "asharsha30/LLAMA_Harsha_8_B_ORDP_10k", - "developer": "meta", - "inference_platform": "unknown", - "id": "asharsha30/LLAMA_Harsha_8_B_ORDP_10k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34639090945358314 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4668707690948544 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36965625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.281000664893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/bfuzzy1_acheron-m1a-llama/da59bcfb-1f9a-41e5-9a8c-14f672dce595.json b/leaderboard_data/HFOpenLLMv2/meta/bfuzzy1_acheron-m1a-llama/da59bcfb-1f9a-41e5-9a8c-14f672dce595.json deleted file mode 100644 index 0fc3bba3dced9d8984c8f8519d1a9c6e7c6b265f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/bfuzzy1_acheron-m1a-llama/da59bcfb-1f9a-41e5-9a8c-14f672dce595.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bfuzzy1_acheron-m1a-llama/1762652580.0322502", - "retrieved_timestamp": "1762652580.032251", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bfuzzy1/acheron-m1a-llama", - "developer": "meta", - "inference_platform": "unknown", - "id": "bfuzzy1/acheron-m1a-llama" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11245827737070972 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29560475093811295 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36330208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11461103723404255 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.514 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/bosonai_Higgs-Llama-3-70B/ebac2d72-ef36-43a7-83de-e28ae3eb4b22.json b/leaderboard_data/HFOpenLLMv2/meta/bosonai_Higgs-Llama-3-70B/ebac2d72-ef36-43a7-83de-e28ae3eb4b22.json deleted file mode 100644 index b33a164c5cd061bb04a03df757665a5b1cd6ed69..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/bosonai_Higgs-Llama-3-70B/ebac2d72-ef36-43a7-83de-e28ae3eb4b22.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bosonai_Higgs-Llama-3-70B/1762652580.035682", - "retrieved_timestamp": "1762652580.035682", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bosonai/Higgs-Llama-3-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "bosonai/Higgs-Llama-3-70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5560678998390935 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.625765879603832 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25226586102719034 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36661073825503354 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44708333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49019281914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Best-Mix-Llama-3.1-8B/ee1e13fe-2ec6-4ce8-8d32-1fe011b12ef8.json b/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Best-Mix-Llama-3.1-8B/ee1e13fe-2ec6-4ce8-8d32-1fe011b12ef8.json deleted file mode 100644 index d9dcd487ac9465de1b45d5324ec25bcfb482cf82..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Best-Mix-Llama-3.1-8B/ee1e13fe-2ec6-4ce8-8d32-1fe011b12ef8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Best-Mix-Llama-3.1-8B/1762652580.0419252", - "retrieved_timestamp": "1762652580.041926", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Best-Mix-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Best-Mix-Llama-3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20670598456539757 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.343178100574048 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2928541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15649933510638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_HyperLlama-3.1-8B/7d031f11-6623-40c0-96bd-b3f0c135600b.json b/leaderboard_data/HFOpenLLMv2/meta/bunnycore_HyperLlama-3.1-8B/7d031f11-6623-40c0-96bd-b3f0c135600b.json deleted file mode 100644 index 24dcd21dbe6393ae69cdd2c2263d5f02c858bb06..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_HyperLlama-3.1-8B/7d031f11-6623-40c0-96bd-b3f0c135600b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_HyperLlama-3.1-8B/1762652580.045207", - "retrieved_timestamp": "1762652580.045208", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/HyperLlama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/HyperLlama-3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7883005979689446 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5103385292046213 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18277945619335348 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38292708333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3783244680851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.1-8B-TitanFusion-Mix/5b0421b6-04ff-422c-a13e-9649306959d4.json b/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.1-8B-TitanFusion-Mix/5b0421b6-04ff-422c-a13e-9649306959d4.json deleted file mode 100644 index 9b839b0d85750a61d42218f69809df9acfedd492..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.1-8B-TitanFusion-Mix/5b0421b6-04ff-422c-a13e-9649306959d4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.1-8B-TitanFusion-Mix/1762652580.045413", - "retrieved_timestamp": "1762652580.045414", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Llama-3.1-8B-TitanFusion-Mix", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.1-8B-TitanFusion-Mix" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4924954675815725 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5755964197928182 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4316979166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3695146276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.1-8B-TitanFusion-v3/6ee91c1c-b44e-44a9-b4b2-4e3cbeb594d3.json b/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.1-8B-TitanFusion-v3/6ee91c1c-b44e-44a9-b4b2-4e3cbeb594d3.json deleted file mode 100644 index 967572d7e8e16203dc2bce8072cd765b3279e16f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.1-8B-TitanFusion-v3/6ee91c1c-b44e-44a9-b4b2-4e3cbeb594d3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.1-8B-TitanFusion-v3/1762652580.045624", - "retrieved_timestamp": "1762652580.045625", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Llama-3.1-8B-TitanFusion-v3", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.1-8B-TitanFusion-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4809549772381725 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5262113071794826 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1419939577039275 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4302083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38056848404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-All-Mix/60766e3b-e153-4ee8-8615-1c1e68b7cd75.json b/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-All-Mix/60766e3b-e153-4ee8-8615-1c1e68b7cd75.json deleted file mode 100644 index 658608ba903f1eab6938a474a0d16d6df4ecd593..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-All-Mix/60766e3b-e153-4ee8-8615-1c1e68b7cd75.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-All-Mix/1762652580.045842", - "retrieved_timestamp": "1762652580.045843", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-All-Mix", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-All-Mix" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7226049105262924 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45083384652782293 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15030211480362538 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32869791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3159906914893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.607 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-Bespoke-Thought/b43702d0-eef7-42d8-87b9-c1cbd0edb417.json b/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-Bespoke-Thought/b43702d0-eef7-42d8-87b9-c1cbd0edb417.json deleted file mode 100644 index af8e84fdcc94aa9550726cdaacbae26530abcfda..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-Bespoke-Thought/b43702d0-eef7-42d8-87b9-c1cbd0edb417.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Bespoke-Thought/1762652580.046056", - "retrieved_timestamp": "1762652580.046057", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-Bespoke-Thought", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-Bespoke-Thought" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4112621178473118 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45217398665008424 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33025 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31100398936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-Booval/9cb855b6-e141-492a-99fb-98858d76f66c.json b/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-Booval/9cb855b6-e141-492a-99fb-98858d76f66c.json deleted file mode 100644 index a27907dd93d71d8bdbb429b35bd988200d00d58e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-Booval/9cb855b6-e141-492a-99fb-98858d76f66c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Booval/1762652580.046278", - "retrieved_timestamp": "1762652580.046279", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-Booval", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-Booval" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6669259786256023 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45143904014934083 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3394270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30576795212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-Deep-Test/76edae8d-f4d3-41b2-8a24-cc676feed31c.json b/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-Deep-Test/76edae8d-f4d3-41b2-8a24-cc676feed31c.json deleted file mode 100644 index 1eb4d6112524b8c74ddc73ef4238efa585a9018d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-Deep-Test/76edae8d-f4d3-41b2-8a24-cc676feed31c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Deep-Test/1762652580.046704", - "retrieved_timestamp": "1762652580.046706", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-Deep-Test", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-Deep-Test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46516797652451053 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4530851376077318 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33939583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3152426861702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.607 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-Deep-Test/f150ea9d-0e4a-49c7-aa12-a703ca011755.json b/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-Deep-Test/f150ea9d-0e4a-49c7-aa12-a703ca011755.json deleted file mode 100644 index 615ccc7bfb3c7aaa9bd015e993cb1bd09b54acf2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-Deep-Test/f150ea9d-0e4a-49c7-aa12-a703ca011755.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Deep-Test/1762652580.046481", - "retrieved_timestamp": "1762652580.046481", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-Deep-Test", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-Deep-Test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17753006467284582 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29502574011260374 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3646666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10488696808510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.803 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-Della/8c23bcaf-2753-4f60-85ec-e92a48b8bba3.json b/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-Della/8c23bcaf-2753-4f60-85ec-e92a48b8bba3.json deleted file mode 100644 index a782997741574e772595e7f80e5d4ef394df7cd1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-Della/8c23bcaf-2753-4f60-85ec-e92a48b8bba3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Della/1762652580.0469692", - "retrieved_timestamp": "1762652580.0469701", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-Della", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-Della" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35608297096149333 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36834936417932634 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39015625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21284906914893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.607 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-Long-Think/bf24dc90-551e-4e0d-8525-9b3b8c4ccfe1.json b/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-Long-Think/bf24dc90-551e-4e0d-8525-9b3b8c4ccfe1.json deleted file mode 100644 index 5e62ed1b11d766487782003e68c917549e5cdcb0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-Long-Think/bf24dc90-551e-4e0d-8525-9b3b8c4ccfe1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Long-Think/1762652580.047193", - "retrieved_timestamp": "1762652580.047194", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-Long-Think", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-Long-Think" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5473499204333391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4610394542442049 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14577039274924472 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33955208333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30477061170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-Mix-Skill/7a6d897c-0efe-4c18-808c-25f6b9a78b5d.json b/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-Mix-Skill/7a6d897c-0efe-4c18-808c-25f6b9a78b5d.json deleted file mode 100644 index ca3f6de2010926b919a72100b1b11c2ab3f37fa0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-Mix-Skill/7a6d897c-0efe-4c18-808c-25f6b9a78b5d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-Mix-Skill/1762652580.047411", - "retrieved_timestamp": "1762652580.047412", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-Mix-Skill", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-Mix-Skill" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6404229666174639 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45818358891543803 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1472809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33961458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3120844414893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.607 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-ProdigyPlus/0ef3d0a9-a3e9-4b33-bece-bd7eec82514d.json b/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-ProdigyPlus/0ef3d0a9-a3e9-4b33-bece-bd7eec82514d.json deleted file mode 100644 index c048aa9e930c8860509d85cc2e6dc696c7edaaf4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-ProdigyPlus/0ef3d0a9-a3e9-4b33-bece-bd7eec82514d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-ProdigyPlus/1762652580.047628", - "retrieved_timestamp": "1762652580.047629", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-ProdigyPlus", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-ProdigyPlus" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40152018865499095 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4392279045834126 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35800000000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28174867021276595 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.607 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-ProdigyPlusPlus/485d4a25-810a-4022-828b-15c255fa2004.json b/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-ProdigyPlusPlus/485d4a25-810a-4022-828b-15c255fa2004.json deleted file mode 100644 index bf7ddc34afd550b47d53649a8dfa94f2664dfd3f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-ProdigyPlusPlus/485d4a25-810a-4022-828b-15c255fa2004.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-ProdigyPlusPlus/1762652580.047838", - "retrieved_timestamp": "1762652580.047839", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-ProdigyPlusPlus", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-ProdigyPlusPlus" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1645157072124186 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3689926047041594 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.354125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15001662234042554 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.607 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-RP-DeepThink/d24cf761-7c11-4f9b-9e41-ca24ac1225b9.json b/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-RP-DeepThink/d24cf761-7c11-4f9b-9e41-ca24ac1225b9.json deleted file mode 100644 index 293f2d3d35f5acd14184c480e1742f63d45af64c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-RP-DeepThink/d24cf761-7c11-4f9b-9e41-ca24ac1225b9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-RP-DeepThink/1762652580.048058", - "retrieved_timestamp": "1762652580.048059", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-RP-DeepThink", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-RP-DeepThink" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7143867161354096 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45625632795830356 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1608761329305136 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33021875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32421875 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.607 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-RRStock/f1af1d33-fb95-462d-830c-5330d6481b6a.json b/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-RRStock/f1af1d33-fb95-462d-830c-5330d6481b6a.json deleted file mode 100644 index 5ea29ac496dd03a4293f5573c04f0d329b476c96..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-RRStock/f1af1d33-fb95-462d-830c-5330d6481b6a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-RRStock/1762652580.048298", - "retrieved_timestamp": "1762652580.048298", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-RRStock", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-RRStock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6657269378582162 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45676937648721455 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16993957703927492 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3314270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32355385638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.607 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-ToxicKod/d59a73eb-0aee-49f8-abce-6500f1fae79d.json b/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-ToxicKod/d59a73eb-0aee-49f8-abce-6500f1fae79d.json deleted file mode 100644 index dac21048033583bee3e56a84cfe9359bda5b726c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3B-ToxicKod/d59a73eb-0aee-49f8-abce-6500f1fae79d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3B-ToxicKod/1762652580.0485172", - "retrieved_timestamp": "1762652580.048518", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3B-ToxicKod", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3B-ToxicKod" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6319299458769398 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4525429005077621 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16993957703927492 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34745833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28798204787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3b-RP-Toxic-Fuse/4c2bc39c-2d04-4afd-a94d-bc8f59e75755.json b/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3b-RP-Toxic-Fuse/4c2bc39c-2d04-4afd-a94d-bc8f59e75755.json deleted file mode 100644 index 3883788e6b247cbf0e57b29458a363863f0f78f0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Llama-3.2-3b-RP-Toxic-Fuse/4c2bc39c-2d04-4afd-a94d-bc8f59e75755.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Llama-3.2-3b-RP-Toxic-Fuse/1762652580.048726", - "retrieved_timestamp": "1762652580.048727", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Llama-3.2-3b-RP-Toxic-Fuse", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Llama-3.2-3b-RP-Toxic-Fuse" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.683362367407368 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46497242330684924 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24018126888217523 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3953645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31058843085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Smol-Llama-3.2-3B/eed01a32-3282-40c9-9a6c-9a0eae79fc8e.json b/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Smol-Llama-3.2-3B/eed01a32-3282-40c9-9a6c-9a0eae79fc8e.json deleted file mode 100644 index 389ee28c37003aeb915977c157e4b0dd64e6ebb6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/bunnycore_Smol-Llama-3.2-3B/eed01a32-3282-40c9-9a6c-9a0eae79fc8e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Smol-Llama-3.2-3B/1762652580.061756", - "retrieved_timestamp": "1762652580.0617611", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Smol-Llama-3.2-3B", - "developer": "meta", - "inference_platform": "unknown", - "id": "bunnycore/Smol-Llama-3.2-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6678501930433471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.453881406940321 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13821752265861026 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34600000000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3228058510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.607 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/chargoddard_prometheus-2-llama-3-8b/ea26b157-81d0-4aa2-a6df-d1d391ab2a3b.json b/leaderboard_data/HFOpenLLMv2/meta/chargoddard_prometheus-2-llama-3-8b/ea26b157-81d0-4aa2-a6df-d1d391ab2a3b.json deleted file mode 100644 index 42afdf28a834ab73c440054b62ecc5dd17e11349..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/chargoddard_prometheus-2-llama-3-8b/ea26b157-81d0-4aa2-a6df-d1d391ab2a3b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/chargoddard_prometheus-2-llama-3-8b/1762652580.100514", - "retrieved_timestamp": "1762652580.100516", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "chargoddard/prometheus-2-llama-3-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "chargoddard/prometheus-2-llama-3-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5288900118352637 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4931144581470071 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0823262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33958333333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30867686170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/cloudyu_Llama-3-70Bx2-MOE/8d0fa497-cdaa-4206-ae80-babed3089d43.json b/leaderboard_data/HFOpenLLMv2/meta/cloudyu_Llama-3-70Bx2-MOE/8d0fa497-cdaa-4206-ae80-babed3089d43.json deleted file mode 100644 index 5eb950946395f5e8935c68a46a1f1dac968cd2c2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/cloudyu_Llama-3-70Bx2-MOE/8d0fa497-cdaa-4206-ae80-babed3089d43.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cloudyu_Llama-3-70Bx2-MOE/1762652580.10177", - "retrieved_timestamp": "1762652580.101771", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cloudyu/Llama-3-70Bx2-MOE", - "developer": "meta", - "inference_platform": "unknown", - "id": "cloudyu/Llama-3-70Bx2-MOE" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5482486469234964 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6636234572270707 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48118750000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5142121010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 126.926 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/cloudyu_Llama-3.2-3Bx4/0f4eaf10-0a2d-48e7-9c22-e1c771da16a0.json b/leaderboard_data/HFOpenLLMv2/meta/cloudyu_Llama-3.2-3Bx4/0f4eaf10-0a2d-48e7-9c22-e1c771da16a0.json deleted file mode 100644 index 7fb2937d252d19a2c52b580f508b9225d968a47e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/cloudyu_Llama-3.2-3Bx4/0f4eaf10-0a2d-48e7-9c22-e1c771da16a0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cloudyu_Llama-3.2-3Bx4/1762652580.102047", - "retrieved_timestamp": "1762652580.102048", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cloudyu/Llama-3.2-3Bx4", - "developer": "meta", - "inference_platform": "unknown", - "id": "cloudyu/Llama-3.2-3Bx4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5068584688626179 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43321946547659324 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10725075528700906 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3495625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29853723404255317 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 9.949 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/cloudyu_S1-Llama-3.2-3Bx4-MoE/4cd18600-a389-4a22-88f8-0e35739665bb.json b/leaderboard_data/HFOpenLLMv2/meta/cloudyu_S1-Llama-3.2-3Bx4-MoE/4cd18600-a389-4a22-88f8-0e35739665bb.json deleted file mode 100644 index 625463392d945821e3cb2590faa5e864d35445a9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/cloudyu_S1-Llama-3.2-3Bx4-MoE/4cd18600-a389-4a22-88f8-0e35739665bb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cloudyu_S1-Llama-3.2-3Bx4-MoE/1762652580.103262", - "retrieved_timestamp": "1762652580.103263", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cloudyu/S1-Llama-3.2-3Bx4-MoE", - "developer": "meta", - "inference_platform": "unknown", - "id": "cloudyu/S1-Llama-3.2-3Bx4-MoE" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.530214275899059 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43578925882973 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.345625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30435505319148937 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 9.555 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/cluebbers_Llama-3.1-8B-paraphrase-type-generation-apty-ipo/e89bbd89-f8fa-4156-94d8-6f390a383557.json b/leaderboard_data/HFOpenLLMv2/meta/cluebbers_Llama-3.1-8B-paraphrase-type-generation-apty-ipo/e89bbd89-f8fa-4156-94d8-6f390a383557.json deleted file mode 100644 index 54a260ca3c70a2609541558945a812c32d6d86a1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/cluebbers_Llama-3.1-8B-paraphrase-type-generation-apty-ipo/e89bbd89-f8fa-4156-94d8-6f390a383557.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cluebbers_Llama-3.1-8B-paraphrase-type-generation-apty-ipo/1762652580.109549", - "retrieved_timestamp": "1762652580.1095521", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-ipo", - "developer": "meta", - "inference_platform": "unknown", - "id": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-ipo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1326668794354535 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800219303191354 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43321875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2590591755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/cluebbers_Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid/f7aec62a-004e-4034-b4d9-152452bb519a.json b/leaderboard_data/HFOpenLLMv2/meta/cluebbers_Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid/f7aec62a-004e-4034-b4d9-152452bb519a.json deleted file mode 100644 index cfcc73890246e1c67184adcff78a49714920bfe0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/cluebbers_Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid/f7aec62a-004e-4034-b4d9-152452bb519a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cluebbers_Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid/1762652580.110752", - "retrieved_timestamp": "1762652580.110753", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid", - "developer": "meta", - "inference_platform": "unknown", - "id": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13184240038652995 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37889016032903705 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43055208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2562333776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/cluebbers_Llama-3.1-8B-paraphrase-type-generation-etpc/dbec72eb-bef2-4985-9ac6-bf5c6dabc25c.json b/leaderboard_data/HFOpenLLMv2/meta/cluebbers_Llama-3.1-8B-paraphrase-type-generation-etpc/dbec72eb-bef2-4985-9ac6-bf5c6dabc25c.json deleted file mode 100644 index 3c97761d882e88a0c4b10394ecbc0414b8695f9c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/cluebbers_Llama-3.1-8B-paraphrase-type-generation-etpc/dbec72eb-bef2-4985-9ac6-bf5c6dabc25c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cluebbers_Llama-3.1-8B-paraphrase-type-generation-etpc/1762652580.1111748", - "retrieved_timestamp": "1762652580.111176", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-etpc", - "developer": "meta", - "inference_platform": "unknown", - "id": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-etpc" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12085156274241235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3780811415223316 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43185416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25556848404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/cognitivecomputations_Dolphin3.0-Llama3.1-8B/fa439482-ca9c-49c3-9732-1147c3965c56.json b/leaderboard_data/HFOpenLLMv2/meta/cognitivecomputations_Dolphin3.0-Llama3.1-8B/fa439482-ca9c-49c3-9732-1147c3965c56.json deleted file mode 100644 index e02490fa28af88f0538027a8fa1bec660de49d6a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/cognitivecomputations_Dolphin3.0-Llama3.1-8B/fa439482-ca9c-49c3-9732-1147c3965c56.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_Dolphin3.0-Llama3.1-8B/1762652580.111501", - "retrieved_timestamp": "1762652580.1115022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cognitivecomputations/Dolphin3.0-Llama3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "cognitivecomputations/Dolphin3.0-Llama3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7621222799948582 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4916366353921198 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36534375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2992021276595745 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/cognitivecomputations_Dolphin3.0-Llama3.2-1B/0aecb893-2b9b-4cfd-bf97-b9887b0aa539.json b/leaderboard_data/HFOpenLLMv2/meta/cognitivecomputations_Dolphin3.0-Llama3.2-1B/0aecb893-2b9b-4cfd-bf97-b9887b0aa539.json deleted file mode 100644 index 8b5b5f818d1c66a691b5e758f78c7076e3f9e868..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/cognitivecomputations_Dolphin3.0-Llama3.2-1B/0aecb893-2b9b-4cfd-bf97-b9887b0aa539.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_Dolphin3.0-Llama3.2-1B/1762652580.112042", - "retrieved_timestamp": "1762652580.112046", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cognitivecomputations/Dolphin3.0-Llama3.2-1B", - "developer": "meta", - "inference_platform": "unknown", - "id": "cognitivecomputations/Dolphin3.0-Llama3.2-1B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5427787160290252 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31222474255909144 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32488541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13754986702127658 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/cognitivecomputations_dolphin-2.9-llama3-8b/d985b9ab-a760-4a50-973e-6985e778b97d.json b/leaderboard_data/HFOpenLLMv2/meta/cognitivecomputations_dolphin-2.9-llama3-8b/d985b9ab-a760-4a50-973e-6985e778b97d.json deleted file mode 100644 index 3e51895eb5d2b423cfd5a933166b6ae9b4257237..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/cognitivecomputations_dolphin-2.9-llama3-8b/d985b9ab-a760-4a50-973e-6985e778b97d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9-llama3-8b/1762652580.113044", - "retrieved_timestamp": "1762652580.113045", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9-llama3-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9-llama3-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38503393218881454 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49499220166609187 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43753125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.277094414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/cognitivecomputations_dolphin-2.9.1-llama-3-70b/7c975279-f21e-418b-bc0b-739a933b91dc.json b/leaderboard_data/HFOpenLLMv2/meta/cognitivecomputations_dolphin-2.9.1-llama-3-70b/7c975279-f21e-418b-bc0b-739a933b91dc.json deleted file mode 100644 index e041917924642575f7b74fedc50c7c8a4c8d6c15..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/cognitivecomputations_dolphin-2.9.1-llama-3-70b/7c975279-f21e-418b-bc0b-739a933b91dc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.1-llama-3-70b/1762652580.113282", - "retrieved_timestamp": "1762652580.1132832", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.1-llama-3-70b", - "developer": "meta", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.1-llama-3-70b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3760167466765959 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5204919312821467 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18202416918429004 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49756249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41298204787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/cognitivecomputations_dolphin-2.9.4-llama3.1-8b/d7da3f99-b538-4b33-a3dc-b2e4a96d3f89.json b/leaderboard_data/HFOpenLLMv2/meta/cognitivecomputations_dolphin-2.9.4-llama3.1-8b/d7da3f99-b538-4b33-a3dc-b2e4a96d3f89.json deleted file mode 100644 index 82af7f87c3dd9e0e4681438d38fcdd268f4d095f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/cognitivecomputations_dolphin-2.9.4-llama3.1-8b/d7da3f99-b538-4b33-a3dc-b2e4a96d3f89.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.4-llama3.1-8b/1762652580.1160939", - "retrieved_timestamp": "1762652580.116095", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.4-llama3.1-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.4-llama3.1-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27572396796056686 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35236263850832567 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3236145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12367021276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/collaiborateorg_Collaiborator-MEDLLM-Llama-3-8B-v2/55eeee3c-b812-4359-ab5f-4e3fa976648f.json b/leaderboard_data/HFOpenLLMv2/meta/collaiborateorg_Collaiborator-MEDLLM-Llama-3-8B-v2/55eeee3c-b812-4359-ab5f-4e3fa976648f.json deleted file mode 100644 index ed8403626e45633771026ffc03ffd363afd19532..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/collaiborateorg_Collaiborator-MEDLLM-Llama-3-8B-v2/55eeee3c-b812-4359-ab5f-4e3fa976648f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/collaiborateorg_Collaiborator-MEDLLM-Llama-3-8B-v2/1762652580.116315", - "retrieved_timestamp": "1762652580.116315", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "collaiborateorg/Collaiborator-MEDLLM-Llama-3-8B-v2", - "developer": "meta", - "inference_platform": "unknown", - "id": "collaiborateorg/Collaiborator-MEDLLM-Llama-3-8B-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.380887157187374 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46480279544898967 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3434270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3480718085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/cpayne1303_llama-43m-beta/d79e4774-159d-4b47-8cc0-64d7844e7bfc.json b/leaderboard_data/HFOpenLLMv2/meta/cpayne1303_llama-43m-beta/d79e4774-159d-4b47-8cc0-64d7844e7bfc.json deleted file mode 100644 index c25c2a8fd418ea4b36d34e0a0d540ceb1a7e7c50..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/cpayne1303_llama-43m-beta/d79e4774-159d-4b47-8cc0-64d7844e7bfc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cpayne1303_llama-43m-beta/1762652580.117069", - "retrieved_timestamp": "1762652580.1170702", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cpayne1303/llama-43m-beta", - "developer": "meta", - "inference_platform": "unknown", - "id": "cpayne1303/llama-43m-beta" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19156837191983936 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29767781029884355 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3871770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11319813829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.043 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/cpayne1303_llama-43m-beta/d987e61a-c7cc-4072-9e2c-faa6304eab65.json b/leaderboard_data/HFOpenLLMv2/meta/cpayne1303_llama-43m-beta/d987e61a-c7cc-4072-9e2c-faa6304eab65.json deleted file mode 100644 index 453bd1b9ef75421c2911b389f676b1ba8b0cdf3a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/cpayne1303_llama-43m-beta/d987e61a-c7cc-4072-9e2c-faa6304eab65.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cpayne1303_llama-43m-beta/1762652580.117342", - "retrieved_timestamp": "1762652580.117342", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cpayne1303/llama-43m-beta", - "developer": "meta", - "inference_platform": "unknown", - "id": "cpayne1303/llama-43m-beta" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19489066787235645 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29646319842669744 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3885416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11112034574468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.043 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/cstr_llama3.1-8b-spaetzle-v90/73270182-a54d-4fc5-834a-89283677c1af.json b/leaderboard_data/HFOpenLLMv2/meta/cstr_llama3.1-8b-spaetzle-v90/73270182-a54d-4fc5-834a-89283677c1af.json deleted file mode 100644 index 3560f7fb310173c01403904bbe91aa2f71adeb49..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/cstr_llama3.1-8b-spaetzle-v90/73270182-a54d-4fc5-834a-89283677c1af.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cstr_llama3.1-8b-spaetzle-v90/1762652580.117986", - "retrieved_timestamp": "1762652580.1179872", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cstr/llama3.1-8b-spaetzle-v90", - "developer": "meta", - "inference_platform": "unknown", - "id": "cstr/llama3.1-8b-spaetzle-v90" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7356192679867197 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5302860633332208 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14954682779456194 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41343749999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37308843085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/deepseek-ai_DeepSeek-R1-Distill-Llama-70B/8df04772-fc5c-4dfb-8366-f9844bf52a0e.json b/leaderboard_data/HFOpenLLMv2/meta/deepseek-ai_DeepSeek-R1-Distill-Llama-70B/8df04772-fc5c-4dfb-8366-f9844bf52a0e.json deleted file mode 100644 index a8036f1b447a080e29571c9fbb3db23902376ffe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/deepseek-ai_DeepSeek-R1-Distill-Llama-70B/8df04772-fc5c-4dfb-8366-f9844bf52a0e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Llama-70B/1762652580.121449", - "retrieved_timestamp": "1762652580.12145", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43359397509718656 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5634962649702303 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3074018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43421875000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4748171542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/deepseek-ai_DeepSeek-R1-Distill-Llama-8B/650f54ba-4d43-4e31-92cd-16c7c1913b34.json b/leaderboard_data/HFOpenLLMv2/meta/deepseek-ai_DeepSeek-R1-Distill-Llama-8B/650f54ba-4d43-4e31-92cd-16c7c1913b34.json deleted file mode 100644 index bde88da94f7fac6ad59de3e41a8b23e6e25e03b1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/deepseek-ai_DeepSeek-R1-Distill-Llama-8B/650f54ba-4d43-4e31-92cd-16c7c1913b34.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Llama-8B/1762652580.121731", - "retrieved_timestamp": "1762652580.121734", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37823973723054827 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.323935108539057 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21978851963746224 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32497916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20894281914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/dfurman_Llama-3-70B-Orpo-v0.1/854d263a-00cc-488a-83eb-c69bb74da5b5.json b/leaderboard_data/HFOpenLLMv2/meta/dfurman_Llama-3-70B-Orpo-v0.1/854d263a-00cc-488a-83eb-c69bb74da5b5.json deleted file mode 100644 index a083083c0de99a7af3190c73213f9720394ecb8e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/dfurman_Llama-3-70B-Orpo-v0.1/854d263a-00cc-488a-83eb-c69bb74da5b5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dfurman_Llama-3-70B-Orpo-v0.1/1762652580.124833", - "retrieved_timestamp": "1762652580.124834", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dfurman/Llama-3-70B-Orpo-v0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "dfurman/Llama-3-70B-Orpo-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20490742341431845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46552376347015506 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1578549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4534375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38929521276595747 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/dfurman_Llama-3-8B-Orpo-v0.1/0a6a3c2b-c0f5-44c7-9ac2-e278a303197e.json b/leaderboard_data/HFOpenLLMv2/meta/dfurman_Llama-3-8B-Orpo-v0.1/0a6a3c2b-c0f5-44c7-9ac2-e278a303197e.json deleted file mode 100644 index 775e8daacb8514677c591a32dd66fbdd6816ba57..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/dfurman_Llama-3-8B-Orpo-v0.1/0a6a3c2b-c0f5-44c7-9ac2-e278a303197e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dfurman_Llama-3-8B-Orpo-v0.1/1762652580.1253839", - "retrieved_timestamp": "1762652580.125385", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dfurman/Llama-3-8B-Orpo-v0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "dfurman/Llama-3-8B-Orpo-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3000039894147528 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3852967582460245 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.041540785498489434 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.357875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22805851063829788 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/dfurman_Llama-3-8B-Orpo-v0.1/10047fc1-254f-406c-807c-3274d9780550.json b/leaderboard_data/HFOpenLLMv2/meta/dfurman_Llama-3-8B-Orpo-v0.1/10047fc1-254f-406c-807c-3274d9780550.json deleted file mode 100644 index 80865c01b0b35115226423e76fcf2167b817c5bd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/dfurman_Llama-3-8B-Orpo-v0.1/10047fc1-254f-406c-807c-3274d9780550.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dfurman_Llama-3-8B-Orpo-v0.1/1762652580.125153", - "retrieved_timestamp": "1762652580.125154", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dfurman/Llama-3-8B-Orpo-v0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "dfurman/Llama-3-8B-Orpo-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28351773294857646 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3842420919898036 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3566354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22980385638297873 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/dnhkng_RYS-Llama3.1-Large/ca04e634-81e6-49fb-bdc4-2ff0ef04b75f.json b/leaderboard_data/HFOpenLLMv2/meta/dnhkng_RYS-Llama3.1-Large/ca04e634-81e6-49fb-bdc4-2ff0ef04b75f.json deleted file mode 100644 index d494299388e05e8712b7b7525ec50024ff81df35..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/dnhkng_RYS-Llama3.1-Large/ca04e634-81e6-49fb-bdc4-2ff0ef04b75f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Llama3.1-Large/1762652580.133179", - "retrieved_timestamp": "1762652580.1331809", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "dnhkng/RYS-Llama3.1-Large", - "developer": "meta", - "inference_platform": "unknown", - "id": "dnhkng/RYS-Llama3.1-Large" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8492001223420524 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6899112229777242 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3504531722054381 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37416107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4553958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5248503989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 81.677 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/duyhv1411_Llama-3.2-1B-en-vi/000fcba9-c157-48de-b672-f583f4cd3881.json b/leaderboard_data/HFOpenLLMv2/meta/duyhv1411_Llama-3.2-1B-en-vi/000fcba9-c157-48de-b672-f583f4cd3881.json deleted file mode 100644 index 351c3657d1d3b6a9545d920810550886f9dfbe2a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/duyhv1411_Llama-3.2-1B-en-vi/000fcba9-c157-48de-b672-f583f4cd3881.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/duyhv1411_Llama-3.2-1B-en-vi/1762652580.1364539", - "retrieved_timestamp": "1762652580.1364548", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "duyhv1411/Llama-3.2-1B-en-vi", - "developer": "meta", - "inference_platform": "unknown", - "id": "duyhv1411/Llama-3.2-1B-en-vi" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4788317220530415 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.329090872737918 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3197083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13414228723404256 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/duyhv1411_Llama-3.2-3B-en-vi/31381b9d-77fe-491d-891c-de4fd37fa1cd.json b/leaderboard_data/HFOpenLLMv2/meta/duyhv1411_Llama-3.2-3B-en-vi/31381b9d-77fe-491d-891c-de4fd37fa1cd.json deleted file mode 100644 index 9aa94fc195c97ee32ab63acf2ba907385a9008bd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/duyhv1411_Llama-3.2-3B-en-vi/31381b9d-77fe-491d-891c-de4fd37fa1cd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/duyhv1411_Llama-3.2-3B-en-vi/1762652580.136725", - "retrieved_timestamp": "1762652580.136726", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "duyhv1411/Llama-3.2-3B-en-vi", - "developer": "meta", - "inference_platform": "unknown", - "id": "duyhv1411/Llama-3.2-3B-en-vi" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4852014876084345 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271639320986486 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.022658610271903322 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3210104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13588763297872342 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ehristoforu_HappyLlama1/07a29c73-e3f4-4f01-b105-ac1ef2fdff43.json b/leaderboard_data/HFOpenLLMv2/meta/ehristoforu_HappyLlama1/07a29c73-e3f4-4f01-b105-ac1ef2fdff43.json deleted file mode 100644 index cfdb226c211645406f90814e88fafc711bb51221..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ehristoforu_HappyLlama1/07a29c73-e3f4-4f01-b105-ac1ef2fdff43.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_HappyLlama1/1762652580.139553", - "retrieved_timestamp": "1762652580.139554", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/HappyLlama1", - "developer": "meta", - "inference_platform": "unknown", - "id": "ehristoforu/HappyLlama1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7362686560548235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49957323097428485 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14274924471299094 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42868749999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35455452127659576 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ehristoforu_mllama-3.1-8b-it/c4fa1166-5255-4b95-8c7b-e1f93265f126.json b/leaderboard_data/HFOpenLLMv2/meta/ehristoforu_mllama-3.1-8b-it/c4fa1166-5255-4b95-8c7b-e1f93265f126.json deleted file mode 100644 index 8b36e00a6d040bb14b3f0e8b67a78afe85ec9a42..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ehristoforu_mllama-3.1-8b-it/c4fa1166-5255-4b95-8c7b-e1f93265f126.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_mllama-3.1-8b-it/1762652580.143829", - "retrieved_timestamp": "1762652580.14383", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/mllama-3.1-8b-it", - "developer": "meta", - "inference_platform": "unknown", - "id": "ehristoforu/mllama-3.1-8b-it" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38788193105404767 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4868027039491969 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37990936555891236 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3348645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26221742021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/flammenai_Llama3.1-Flammades-70B/92b8ecb7-80a2-4b77-bf20-8d87a36209c0.json b/leaderboard_data/HFOpenLLMv2/meta/flammenai_Llama3.1-Flammades-70B/92b8ecb7-80a2-4b77-bf20-8d87a36209c0.json deleted file mode 100644 index 501d35cc280966fb5eab3e5ca63a1dc11dfc4c7b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/flammenai_Llama3.1-Flammades-70B/92b8ecb7-80a2-4b77-bf20-8d87a36209c0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/flammenai_Llama3.1-Flammades-70B/1762652580.154665", - "retrieved_timestamp": "1762652580.154666", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "flammenai/Llama3.1-Flammades-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "flammenai/Llama3.1-Flammades-70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7058438277104748 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6659721866694542 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20921450151057402 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540268456375839 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48705208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47523271276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/flammenai_Mahou-1.2a-llama3-8B/eb10ecab-2be4-4b75-9b85-d2f2786fd095.json b/leaderboard_data/HFOpenLLMv2/meta/flammenai_Mahou-1.2a-llama3-8B/eb10ecab-2be4-4b75-9b85-d2f2786fd095.json deleted file mode 100644 index 29fdd237404d540af9a5b26036ebef324c54ea5e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/flammenai_Mahou-1.2a-llama3-8B/eb10ecab-2be4-4b75-9b85-d2f2786fd095.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/flammenai_Mahou-1.2a-llama3-8B/1762652580.154922", - "retrieved_timestamp": "1762652580.154923", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "flammenai/Mahou-1.2a-llama3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "flammenai/Mahou-1.2a-llama3-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.50925655039739 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5093660540433169 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08383685800604229 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38466666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38173204787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/flammenai_Mahou-1.5-llama3.1-70B/653ff1ac-158e-4d36-a813-22ebef4a76ce.json b/leaderboard_data/HFOpenLLMv2/meta/flammenai_Mahou-1.5-llama3.1-70B/653ff1ac-158e-4d36-a813-22ebef4a76ce.json deleted file mode 100644 index 17bbddf96e86523f65fd773fa4532f07dbdb00fa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/flammenai_Mahou-1.5-llama3.1-70B/653ff1ac-158e-4d36-a813-22ebef4a76ce.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/flammenai_Mahou-1.5-llama3.1-70B/1762652580.155493", - "retrieved_timestamp": "1762652580.155494", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "flammenai/Mahou-1.5-llama3.1-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "flammenai/Mahou-1.5-llama3.1-70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7146615424850509 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6650860641288713 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20996978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540268456375839 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4950208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47490026595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/fluently-lm_Llama-TI-8B/63a32ad0-b871-437c-991a-342de8c13345.json b/leaderboard_data/HFOpenLLMv2/meta/fluently-lm_Llama-TI-8B/63a32ad0-b871-437c-991a-342de8c13345.json deleted file mode 100644 index 395468ad487f4947671dc862bbe8ba28adf4c70c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/fluently-lm_Llama-TI-8B/63a32ad0-b871-437c-991a-342de8c13345.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/fluently-lm_Llama-TI-8B/1762652580.156513", - "retrieved_timestamp": "1762652580.156514", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "fluently-lm/Llama-TI-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "fluently-lm/Llama-TI-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28803906966847964 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.520085504155627 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19637462235649547 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4102708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.343999335106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/fulim_FineLlama-3.1-8B/46fa0a20-2810-4f0b-befe-afc3fc774734.json b/leaderboard_data/HFOpenLLMv2/meta/fulim_FineLlama-3.1-8B/46fa0a20-2810-4f0b-befe-afc3fc774734.json deleted file mode 100644 index fd86cdf17643ed364c9576ec92277ae297cac063..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/fulim_FineLlama-3.1-8B/46fa0a20-2810-4f0b-befe-afc3fc774734.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/fulim_FineLlama-3.1-8B/1762652580.162704", - "retrieved_timestamp": "1762652580.162705", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "fulim/FineLlama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "fulim/FineLlama-3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14388267574480157 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.456920741562608 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38673958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31673869680851063 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/gbueno86_Brinebreath-Llama-3.1-70B/12e0e194-ef37-4da5-9354-e82f983fadb2.json b/leaderboard_data/HFOpenLLMv2/meta/gbueno86_Brinebreath-Llama-3.1-70B/12e0e194-ef37-4da5-9354-e82f983fadb2.json deleted file mode 100644 index 4ca54b912a2a9412811b9a016108aff5ca852db2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/gbueno86_Brinebreath-Llama-3.1-70B/12e0e194-ef37-4da5-9354-e82f983fadb2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/gbueno86_Brinebreath-Llama-3.1-70B/1762652580.1638331", - "retrieved_timestamp": "1762652580.163834", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "gbueno86/Brinebreath-Llama-3.1-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "gbueno86/Brinebreath-Llama-3.1-70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5532952565858589 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6880562247706813 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.297583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45406250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5196143617021277 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/gbueno86_Meta-LLama-3-Cat-Smaug-LLama-70b/9b7181ec-81f6-438a-8af6-a219f356f430.json b/leaderboard_data/HFOpenLLMv2/meta/gbueno86_Meta-LLama-3-Cat-Smaug-LLama-70b/9b7181ec-81f6-438a-8af6-a219f356f430.json deleted file mode 100644 index c420871c860cae232f3795eb598d778a43ec7039..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/gbueno86_Meta-LLama-3-Cat-Smaug-LLama-70b/9b7181ec-81f6-438a-8af6-a219f356f430.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/gbueno86_Meta-LLama-3-Cat-Smaug-LLama-70b/1762652580.1641119", - "retrieved_timestamp": "1762652580.1641128", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "gbueno86/Meta-LLama-3-Cat-Smaug-LLama-70b", - "developer": "meta", - "inference_platform": "unknown", - "id": "gbueno86/Meta-LLama-3-Cat-Smaug-LLama-70b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8071849359698933 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6674314931312052 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2938066465256798 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43682291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5074800531914894 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/glaiveai_Reflection-Llama-3.1-70B/3e8ba765-d24b-4ffe-a816-21ea02b7ba14.json b/leaderboard_data/HFOpenLLMv2/meta/glaiveai_Reflection-Llama-3.1-70B/3e8ba765-d24b-4ffe-a816-21ea02b7ba14.json deleted file mode 100644 index 3c839cc27827e75601468faa0b5b444bcf7a3777..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/glaiveai_Reflection-Llama-3.1-70B/3e8ba765-d24b-4ffe-a816-21ea02b7ba14.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/glaiveai_Reflection-Llama-3.1-70B/1762652580.164674", - "retrieved_timestamp": "1762652580.164675", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "glaiveai/Reflection-Llama-3.1-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "glaiveai/Reflection-Llama-3.1-70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5990571683134085 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5681010035620444 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2756797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43803125000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6341422872340425 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 69.5 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/gmonsoon_SahabatAI-Llama-11B-Test/48f5e083-9fa3-4753-a734-578ac3e15e1f.json b/leaderboard_data/HFOpenLLMv2/meta/gmonsoon_SahabatAI-Llama-11B-Test/48f5e083-9fa3-4753-a734-578ac3e15e1f.json deleted file mode 100644 index a460356893ff15e61e98abfe3c6110aab1aa2641..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/gmonsoon_SahabatAI-Llama-11B-Test/48f5e083-9fa3-4753-a734-578ac3e15e1f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/gmonsoon_SahabatAI-Llama-11B-Test/1762652580.16498", - "retrieved_timestamp": "1762652580.164981", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "gmonsoon/SahabatAI-Llama-11B-Test", - "developer": "meta", - "inference_platform": "unknown", - "id": "gmonsoon/SahabatAI-Llama-11B-Test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33757319467900726 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4727584153058988 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40013541666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3182347074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 11.52 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/grimjim_DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B/f7439085-a0c9-4d5b-bd4f-bf1841d5ce02.json b/leaderboard_data/HFOpenLLMv2/meta/grimjim_DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B/f7439085-a0c9-4d5b-bd4f-bf1841d5ce02.json deleted file mode 100644 index b20ef41e2048263d9ec87c1d92d2b58c558c3baa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/grimjim_DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B/f7439085-a0c9-4d5b-bd4f-bf1841d5ce02.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/grimjim_DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B/1762652580.181649", - "retrieved_timestamp": "1762652580.18165", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "grimjim/DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "grimjim/DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4797060687863757 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5269400362212973 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22205438066465258 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44078124999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3956948138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/grimjim_HuatuoSkywork-o1-Llama-3.1-8B/6a173156-75b3-47f4-9f88-ecace0ee6942.json b/leaderboard_data/HFOpenLLMv2/meta/grimjim_HuatuoSkywork-o1-Llama-3.1-8B/6a173156-75b3-47f4-9f88-ecace0ee6942.json deleted file mode 100644 index e06429419cde5fe2000f0e1652aa68f0c941f029..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/grimjim_HuatuoSkywork-o1-Llama-3.1-8B/6a173156-75b3-47f4-9f88-ecace0ee6942.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/grimjim_HuatuoSkywork-o1-Llama-3.1-8B/1762652580.182574", - "retrieved_timestamp": "1762652580.182574", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "grimjim/HuatuoSkywork-o1-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "grimjim/HuatuoSkywork-o1-Llama-3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3961499931293413 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48863582396592203 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38821752265861026 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38385416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30950797872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/grimjim_Llama-Nephilim-Metamorphosis-v2-8B/ac20706b-0370-47de-bc6b-ae188f8a9259.json b/leaderboard_data/HFOpenLLMv2/meta/grimjim_Llama-Nephilim-Metamorphosis-v2-8B/ac20706b-0370-47de-bc6b-ae188f8a9259.json deleted file mode 100644 index e95c1b10c51065d893dec2f23bbbd088cd4579f2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/grimjim_Llama-Nephilim-Metamorphosis-v2-8B/ac20706b-0370-47de-bc6b-ae188f8a9259.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/grimjim_Llama-Nephilim-Metamorphosis-v2-8B/1762652580.183682", - "retrieved_timestamp": "1762652580.1836832", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "grimjim/Llama-Nephilim-Metamorphosis-v2-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "grimjim/Llama-Nephilim-Metamorphosis-v2-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4544519652300341 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5013477378974034 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13972809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40909375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38090093085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/grimjim_Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B/f2fbc411-4a4b-4727-9fdc-eda481f4f10c.json b/leaderboard_data/HFOpenLLMv2/meta/grimjim_Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B/f2fbc411-4a4b-4727-9fdc-eda481f4f10c.json deleted file mode 100644 index 844877ee300c8b64183ab381f9491fa70c49cf81..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/grimjim_Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B/f2fbc411-4a4b-4727-9fdc-eda481f4f10c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/grimjim_Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B/1762652580.183897", - "retrieved_timestamp": "1762652580.183897", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "grimjim/Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "grimjim/Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43659157701565177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5287189378780882 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30060422960725075 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3998541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3683510638297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/grimjim_SauerHuatuoSkywork-o1-Llama-3.1-8B/30482674-45a3-4400-84e0-eef215540eb5.json b/leaderboard_data/HFOpenLLMv2/meta/grimjim_SauerHuatuoSkywork-o1-Llama-3.1-8B/30482674-45a3-4400-84e0-eef215540eb5.json deleted file mode 100644 index cc44bc412c97ef6f34e9516b2bf761c949e78415..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/grimjim_SauerHuatuoSkywork-o1-Llama-3.1-8B/30482674-45a3-4400-84e0-eef215540eb5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/grimjim_SauerHuatuoSkywork-o1-Llama-3.1-8B/1762652580.186095", - "retrieved_timestamp": "1762652580.1860962", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "grimjim/SauerHuatuoSkywork-o1-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "grimjim/SauerHuatuoSkywork-o1-Llama-3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5219462138237654 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5222077363554879 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1729607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45268749999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39910239361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/grimjim_llama-3-Nephilim-v1-8B/498c4d5e-0500-42da-9c75-e8da578516f8.json b/leaderboard_data/HFOpenLLMv2/meta/grimjim_llama-3-Nephilim-v1-8B/498c4d5e-0500-42da-9c75-e8da578516f8.json deleted file mode 100644 index fe1db9e0ce2838bed12515fb4aa5376222fcbc30..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/grimjim_llama-3-Nephilim-v1-8B/498c4d5e-0500-42da-9c75-e8da578516f8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/grimjim_llama-3-Nephilim-v1-8B/1762652580.186311", - "retrieved_timestamp": "1762652580.186312", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "grimjim/llama-3-Nephilim-v1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "grimjim/llama-3-Nephilim-v1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4277239945566652 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5131817939007638 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41362499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37957114361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/grimjim_llama-3-Nephilim-v2-8B/de82dcd9-adae-4b28-8248-156e324e036d.json b/leaderboard_data/HFOpenLLMv2/meta/grimjim_llama-3-Nephilim-v2-8B/de82dcd9-adae-4b28-8248-156e324e036d.json deleted file mode 100644 index 276f17cd05701f94ae7ffa08e023f47bef1cbcd0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/grimjim_llama-3-Nephilim-v2-8B/de82dcd9-adae-4b28-8248-156e324e036d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/grimjim_llama-3-Nephilim-v2-8B/1762652580.186511", - "retrieved_timestamp": "1762652580.1865118", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "grimjim/llama-3-Nephilim-v2-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "grimjim/llama-3-Nephilim-v2-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39222817679313116 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5048214936442625 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3895 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3641123670212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/grimjim_llama-3-Nephilim-v2.1-8B/df6327cf-82e1-437f-9c9a-c31205452717.json b/leaderboard_data/HFOpenLLMv2/meta/grimjim_llama-3-Nephilim-v2.1-8B/df6327cf-82e1-437f-9c9a-c31205452717.json deleted file mode 100644 index 6c2a4f6a12d2d7e50936deb52c63a74e6c8d6746..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/grimjim_llama-3-Nephilim-v2.1-8B/df6327cf-82e1-437f-9c9a-c31205452717.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/grimjim_llama-3-Nephilim-v2.1-8B/1762652580.186715", - "retrieved_timestamp": "1762652580.186715", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "grimjim/llama-3-Nephilim-v2.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "grimjim/llama-3-Nephilim-v2.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38950540122430705 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5095042703104161 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3935 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3644448138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/grimjim_llama-3-Nephilim-v3-8B/ecee6e6a-15a1-4455-9724-34ca14477064.json b/leaderboard_data/HFOpenLLMv2/meta/grimjim_llama-3-Nephilim-v3-8B/ecee6e6a-15a1-4455-9724-34ca14477064.json deleted file mode 100644 index 5c23821689324e6ec73d2c4b40a7a20925edad1b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/grimjim_llama-3-Nephilim-v3-8B/ecee6e6a-15a1-4455-9724-34ca14477064.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/grimjim_llama-3-Nephilim-v3-8B/1762652580.186964", - "retrieved_timestamp": "1762652580.186965", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "grimjim/llama-3-Nephilim-v3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "grimjim/llama-3-Nephilim-v3-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4173825449806513 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5012671264428366 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09516616314199396 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3989270833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3612034574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/hotmailuser_Llama-Hermes-slerp-8B/cf2de222-77bf-456c-acb3-c3aa33367a9d.json b/leaderboard_data/HFOpenLLMv2/meta/hotmailuser_Llama-Hermes-slerp-8B/cf2de222-77bf-456c-acb3-c3aa33367a9d.json deleted file mode 100644 index 1f394c40255bef238fe07017b3cc02cf21e922d1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/hotmailuser_Llama-Hermes-slerp-8B/cf2de222-77bf-456c-acb3-c3aa33367a9d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_Llama-Hermes-slerp-8B/1762652580.1947231", - "retrieved_timestamp": "1762652580.194724", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/Llama-Hermes-slerp-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "hotmailuser/Llama-Hermes-slerp-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3390470617960345 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5310290010444968 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08006042296072508 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4077916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33311170212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/hotmailuser_Llama-Hermes-slerp2-8B/be5505d7-06ae-4ab5-ba7f-6ff4732b3180.json b/leaderboard_data/HFOpenLLMv2/meta/hotmailuser_Llama-Hermes-slerp2-8B/be5505d7-06ae-4ab5-ba7f-6ff4732b3180.json deleted file mode 100644 index 1cb4537bb2a61808f451bc10f03332192c0c40fa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/hotmailuser_Llama-Hermes-slerp2-8B/be5505d7-06ae-4ab5-ba7f-6ff4732b3180.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_Llama-Hermes-slerp2-8B/1762652580.194975", - "retrieved_timestamp": "1762652580.194976", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/Llama-Hermes-slerp2-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "hotmailuser/Llama-Hermes-slerp2-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3728440537773109 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5265283171967207 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42481250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33793218085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/hotmailuser_LlamaStock-8B/23b559eb-4493-462f-bb37-5e232b3336bc.json b/leaderboard_data/HFOpenLLMv2/meta/hotmailuser_LlamaStock-8B/23b559eb-4493-462f-bb37-5e232b3336bc.json deleted file mode 100644 index 530d21743fa90c8390d93e92999b167193e56768..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/hotmailuser_LlamaStock-8B/23b559eb-4493-462f-bb37-5e232b3336bc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_LlamaStock-8B/1762652580.19518", - "retrieved_timestamp": "1762652580.19518", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/LlamaStock-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "hotmailuser/LlamaStock-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4249513513034304 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5328942883826541 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16993957703927492 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41293749999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3806515957446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/huggyllama_llama-13b/20b49499-5df3-450c-a20d-dc421b937e91.json b/leaderboard_data/HFOpenLLMv2/meta/huggyllama_llama-13b/20b49499-5df3-450c-a20d-dc421b937e91.json deleted file mode 100644 index 51b536a1fc6a61e5c9db00388f9aea275e6a57a8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/huggyllama_llama-13b/20b49499-5df3-450c-a20d-dc421b937e91.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/huggyllama_llama-13b/1762652580.199647", - "retrieved_timestamp": "1762652580.199648", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "huggyllama/llama-13b", - "developer": "meta", - "inference_platform": "unknown", - "id": "huggyllama/llama-13b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24105262924595627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39878925581174585 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34621875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19522938829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.016 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/huggyllama_llama-65b/2bff16e4-f0ed-4957-8b20-4ae269642088.json b/leaderboard_data/HFOpenLLMv2/meta/huggyllama_llama-65b/2bff16e4-f0ed-4957-8b20-4ae269642088.json deleted file mode 100644 index 91d9a0558c49ed623387fb6e5b4711f7558abe21..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/huggyllama_llama-65b/2bff16e4-f0ed-4957-8b20-4ae269642088.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/huggyllama_llama-65b/1762652580.1999428", - "retrieved_timestamp": "1762652580.199944", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "huggyllama/llama-65b", - "developer": "meta", - "inference_platform": "unknown", - "id": "huggyllama/llama-65b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25259311958935626 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4702556052882764 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35945833333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3077626329787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 65.286 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/huggyllama_llama-7b/61a5624d-ef42-4fdd-a0b1-08fdc2d07615.json b/leaderboard_data/HFOpenLLMv2/meta/huggyllama_llama-7b/61a5624d-ef42-4fdd-a0b1-08fdc2d07615.json deleted file mode 100644 index d2d746a90a58576750a8a5e83f4ce399d18126f1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/huggyllama_llama-7b/61a5624d-ef42-4fdd-a0b1-08fdc2d07615.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/huggyllama_llama-7b/1762652580.200164", - "retrieved_timestamp": "1762652580.200165", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "huggyllama/llama-7b", - "developer": "meta", - "inference_platform": "unknown", - "id": "huggyllama/llama-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25009530268576263 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32773134782898566 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33539583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13131648936170212 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/iFaz_llama31_8B_en_emo_v4/198e5d81-0dcd-4dc0-9919-139ce0aa2dd5.json b/leaderboard_data/HFOpenLLMv2/meta/iFaz_llama31_8B_en_emo_v4/198e5d81-0dcd-4dc0-9919-139ce0aa2dd5.json deleted file mode 100644 index 438fb4d9c78f643baa6ef5bf7b581d8b66fcc44f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/iFaz_llama31_8B_en_emo_v4/198e5d81-0dcd-4dc0-9919-139ce0aa2dd5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/iFaz_llama31_8B_en_emo_v4/1762652580.202469", - "retrieved_timestamp": "1762652580.202469", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "iFaz/llama31_8B_en_emo_v4", - "developer": "meta", - "inference_platform": "unknown", - "id": "iFaz/llama31_8B_en_emo_v4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3042504997850149 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49155384618761383 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08836858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3642916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3048537234042553 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "", - "params_billions": 4.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/iFaz_llama32_1B_en_emo_v1/f202b553-56e6-4a27-b2fa-0f98feabe11e.json b/leaderboard_data/HFOpenLLMv2/meta/iFaz_llama32_1B_en_emo_v1/f202b553-56e6-4a27-b2fa-0f98feabe11e.json deleted file mode 100644 index 3b5f26b322b3a2e6ebd502e2491e29b1dee21120..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/iFaz_llama32_1B_en_emo_v1/f202b553-56e6-4a27-b2fa-0f98feabe11e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/iFaz_llama32_1B_en_emo_v1/1762652580.2027268", - "retrieved_timestamp": "1762652580.2027268", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "iFaz/llama32_1B_en_emo_v1", - "developer": "meta", - "inference_platform": "unknown", - "id": "iFaz/llama32_1B_en_emo_v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44083808738591385 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33802631394113886 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34888541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17611369680851063 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.765 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/iFaz_llama32_3B_en_emo_1000_stp/a4111230-4313-4f75-bcd3-c598e436987b.json b/leaderboard_data/HFOpenLLMv2/meta/iFaz_llama32_3B_en_emo_1000_stp/a4111230-4313-4f75-bcd3-c598e436987b.json deleted file mode 100644 index 6d83931f9e6cebb8db8570e862bc617d4110184a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/iFaz_llama32_3B_en_emo_1000_stp/a4111230-4313-4f75-bcd3-c598e436987b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/iFaz_llama32_3B_en_emo_1000_stp/1762652580.202935", - "retrieved_timestamp": "1762652580.2029362", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "iFaz/llama32_3B_en_emo_1000_stp", - "developer": "meta", - "inference_platform": "unknown", - "id": "iFaz/llama32_3B_en_emo_1000_stp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7295243287809678 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45218477635502685 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3620625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3123337765957447 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.848 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/iFaz_llama32_3B_en_emo_2000_stp/5468fbdc-63e7-4e9d-8370-2f3f0e83e559.json b/leaderboard_data/HFOpenLLMv2/meta/iFaz_llama32_3B_en_emo_2000_stp/5468fbdc-63e7-4e9d-8370-2f3f0e83e559.json deleted file mode 100644 index 553c1dbea814130e3b03ac854ffd0d12fb227699..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/iFaz_llama32_3B_en_emo_2000_stp/5468fbdc-63e7-4e9d-8370-2f3f0e83e559.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/iFaz_llama32_3B_en_emo_2000_stp/1762652580.203131", - "retrieved_timestamp": "1762652580.203132", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "iFaz/llama32_3B_en_emo_2000_stp", - "developer": "meta", - "inference_platform": "unknown", - "id": "iFaz/llama32_3B_en_emo_2000_stp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7368681764385165 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45345889848516396 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15332326283987915 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35269791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3097573138297872 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.848 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/iFaz_llama32_3B_en_emo_300_stp/0806c872-f913-493a-ada4-7db88a93b840.json b/leaderboard_data/HFOpenLLMv2/meta/iFaz_llama32_3B_en_emo_300_stp/0806c872-f913-493a-ada4-7db88a93b840.json deleted file mode 100644 index ed11093b3269298ec974d23607a26a7cced82613..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/iFaz_llama32_3B_en_emo_300_stp/0806c872-f913-493a-ada4-7db88a93b840.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/iFaz_llama32_3B_en_emo_300_stp/1762652580.203331", - "retrieved_timestamp": "1762652580.203331", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "iFaz/llama32_3B_en_emo_300_stp", - "developer": "meta", - "inference_platform": "unknown", - "id": "iFaz/llama32_3B_en_emo_300_stp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.725552644760347 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45045681689917494 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3620625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3148271276595745 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.848 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/iFaz_llama32_3B_en_emo_5000_stp/9ffc9dbb-065b-47ae-a985-541ee7f7126d.json b/leaderboard_data/HFOpenLLMv2/meta/iFaz_llama32_3B_en_emo_5000_stp/9ffc9dbb-065b-47ae-a985-541ee7f7126d.json deleted file mode 100644 index fbcb8e46b6be0f58bbb8e46d66c3b13629f2faca..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/iFaz_llama32_3B_en_emo_5000_stp/9ffc9dbb-065b-47ae-a985-541ee7f7126d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/iFaz_llama32_3B_en_emo_5000_stp/1762652580.203531", - "retrieved_timestamp": "1762652580.203532", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "iFaz/llama32_3B_en_emo_5000_stp", - "developer": "meta", - "inference_platform": "unknown", - "id": "iFaz/llama32_3B_en_emo_5000_stp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7100404703963262 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4567949942342784 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34460416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30668218085106386 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.848 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/iFaz_llama32_3B_en_emo_v2/03587c1e-14e3-434f-9582-448914832c95.json b/leaderboard_data/HFOpenLLMv2/meta/iFaz_llama32_3B_en_emo_v2/03587c1e-14e3-434f-9582-448914832c95.json deleted file mode 100644 index 8426649d83c700c86eba52ea7fb4d2ca12deb26b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/iFaz_llama32_3B_en_emo_v2/03587c1e-14e3-434f-9582-448914832c95.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/iFaz_llama32_3B_en_emo_v2/1762652580.203742", - "retrieved_timestamp": "1762652580.203743", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "iFaz/llama32_3B_en_emo_v2", - "developer": "meta", - "inference_platform": "unknown", - "id": "iFaz/llama32_3B_en_emo_v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5454017562290279 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4283518305582969 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10876132930513595 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34822916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3003656914893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.848 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/iFaz_llama32_3B_en_emo_v3/8bb5540b-b19d-4641-9dea-36ea43b07250.json b/leaderboard_data/HFOpenLLMv2/meta/iFaz_llama32_3B_en_emo_v3/8bb5540b-b19d-4641-9dea-36ea43b07250.json deleted file mode 100644 index e04168a67289fd861a56292c5d1efa918df36e02..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/iFaz_llama32_3B_en_emo_v3/8bb5540b-b19d-4641-9dea-36ea43b07250.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/iFaz_llama32_3B_en_emo_v3/1762652580.203954", - "retrieved_timestamp": "1762652580.203954", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "iFaz/llama32_3B_en_emo_v3", - "developer": "meta", - "inference_platform": "unknown", - "id": "iFaz/llama32_3B_en_emo_v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5759263199421978 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43013596402782367 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35527083333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27102726063829785 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.848 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/jiangxinyang-shanda_Homer-LLama3-8B/73c50ab1-bdf8-4fbc-b7e6-d4a8e8bb8a4e.json b/leaderboard_data/HFOpenLLMv2/meta/jiangxinyang-shanda_Homer-LLama3-8B/73c50ab1-bdf8-4fbc-b7e6-d4a8e8bb8a4e.json deleted file mode 100644 index 96c11a286dc08856c6bcf9aea977058b7fa18cd9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/jiangxinyang-shanda_Homer-LLama3-8B/73c50ab1-bdf8-4fbc-b7e6-d4a8e8bb8a4e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/jiangxinyang-shanda_Homer-LLama3-8B/1762652580.2879412", - "retrieved_timestamp": "1762652580.287943", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "jiangxinyang-shanda/Homer-LLama3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "jiangxinyang-shanda/Homer-LLama3-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3991719748046295 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5173242047543128 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08610271903323263 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40562499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3139128989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/keeeeenw_MicroLlama/7407c2ed-23f5-4c92-b987-2c3a91147d98.json b/leaderboard_data/HFOpenLLMv2/meta/keeeeenw_MicroLlama/7407c2ed-23f5-4c92-b987-2c3a91147d98.json deleted file mode 100644 index 84d8a4ba2cb773002953b1e3f91e838e6328948e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/keeeeenw_MicroLlama/7407c2ed-23f5-4c92-b987-2c3a91147d98.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/keeeeenw_MicroLlama/1762652580.3060532", - "retrieved_timestamp": "1762652580.3060539", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "keeeeenw/MicroLlama", - "developer": "meta", - "inference_platform": "unknown", - "id": "keeeeenw/MicroLlama" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19853765785892544 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3007313991347165 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36981249999999993 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11377992021276596 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.305 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/kevin009_llamaRAGdrama/41e4d24f-9790-40f5-a915-ee4155d5cbc6.json b/leaderboard_data/HFOpenLLMv2/meta/kevin009_llamaRAGdrama/41e4d24f-9790-40f5-a915-ee4155d5cbc6.json deleted file mode 100644 index 714c639301774af9e61fb4cd336b1d337dc44004..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/kevin009_llamaRAGdrama/41e4d24f-9790-40f5-a915-ee4155d5cbc6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/kevin009_llamaRAGdrama/1762652580.3065941", - "retrieved_timestamp": "1762652580.3065941", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "kevin009/llamaRAGdrama", - "developer": "meta", - "inference_platform": "unknown", - "id": "kevin009/llamaRAGdrama" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2598372318780835 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4007385667099335 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43157291666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27235704787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/khoantap_llama-3-8b-stock-merge/211ac2a5-5bd1-4347-8eb8-fa1bd4b1a5ad.json b/leaderboard_data/HFOpenLLMv2/meta/khoantap_llama-3-8b-stock-merge/211ac2a5-5bd1-4347-8eb8-fa1bd4b1a5ad.json deleted file mode 100644 index 302726816468e56ef0ca9b5238b46ce93bd765a6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/khoantap_llama-3-8b-stock-merge/211ac2a5-5bd1-4347-8eb8-fa1bd4b1a5ad.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/khoantap_llama-3-8b-stock-merge/1762652580.307331", - "retrieved_timestamp": "1762652580.307332", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "khoantap/llama-3-8b-stock-merge", - "developer": "meta", - "inference_platform": "unknown", - "id": "khoantap/llama-3-8b-stock-merge" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48117993590340297 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5162255701726589 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16163141993957703 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39458333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37998670212765956 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/khoantap_llama-breadcrumbs-ties-merge/9eae434a-fb2a-45b9-a592-f39a9c469f07.json b/leaderboard_data/HFOpenLLMv2/meta/khoantap_llama-breadcrumbs-ties-merge/9eae434a-fb2a-45b9-a592-f39a9c469f07.json deleted file mode 100644 index 9812ea48dfcdf4752ddfe2287085c221bdc06c15..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/khoantap_llama-breadcrumbs-ties-merge/9eae434a-fb2a-45b9-a592-f39a9c469f07.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/khoantap_llama-breadcrumbs-ties-merge/1762652580.307606", - "retrieved_timestamp": "1762652580.307607", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "khoantap/llama-breadcrumbs-ties-merge", - "developer": "meta", - "inference_platform": "unknown", - "id": "khoantap/llama-breadcrumbs-ties-merge" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22051933314716063 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5415928172799896 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44344791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3171542553191489 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/khoantap_llama-evolve-ties-best-merge/0ab7f323-1be5-4fc7-a5d8-d4f77f802da3.json b/leaderboard_data/HFOpenLLMv2/meta/khoantap_llama-evolve-ties-best-merge/0ab7f323-1be5-4fc7-a5d8-d4f77f802da3.json deleted file mode 100644 index 7c647dbdd7d5ee52ab440f60ea5429ef77471599..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/khoantap_llama-evolve-ties-best-merge/0ab7f323-1be5-4fc7-a5d8-d4f77f802da3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/khoantap_llama-evolve-ties-best-merge/1762652580.307874", - "retrieved_timestamp": "1762652580.3078752", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "khoantap/llama-evolve-ties-best-merge", - "developer": "meta", - "inference_platform": "unknown", - "id": "khoantap/llama-evolve-ties-best-merge" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6743950495795601 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5413565104914732 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39455208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859707446808511 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/khoantap_llama-linear-0.5-0.5-1-merge/0906fee9-0edd-494f-bf01-a34711f17596.json b/leaderboard_data/HFOpenLLMv2/meta/khoantap_llama-linear-0.5-0.5-1-merge/0906fee9-0edd-494f-bf01-a34711f17596.json deleted file mode 100644 index 789d635cb7d03f049105e9202d57a3b5a7350ae8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/khoantap_llama-linear-0.5-0.5-1-merge/0906fee9-0edd-494f-bf01-a34711f17596.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/khoantap_llama-linear-0.5-0.5-1-merge/1762652580.3081899", - "retrieved_timestamp": "1762652580.308191", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "khoantap/llama-linear-0.5-0.5-1-merge", - "developer": "meta", - "inference_platform": "unknown", - "id": "khoantap/llama-linear-0.5-0.5-1-merge" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48122980358781364 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5643013649244941 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41427083333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38331117021276595 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/khoantap_llama-linear-0.5-1-0.5-merge/88d174f6-6d30-4859-bbf0-6f5446ce1b9d.json b/leaderboard_data/HFOpenLLMv2/meta/khoantap_llama-linear-0.5-1-0.5-merge/88d174f6-6d30-4859-bbf0-6f5446ce1b9d.json deleted file mode 100644 index 0f55ebbaaa0105eec8966e77f60d73b048cae94e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/khoantap_llama-linear-0.5-1-0.5-merge/88d174f6-6d30-4859-bbf0-6f5446ce1b9d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/khoantap_llama-linear-0.5-1-0.5-merge/1762652580.308497", - "retrieved_timestamp": "1762652580.308498", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "khoantap/llama-linear-0.5-1-0.5-merge", - "developer": "meta", - "inference_platform": "unknown", - "id": "khoantap/llama-linear-0.5-1-0.5-merge" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5031616111916382 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5950766502131658 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14803625377643503 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4171875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3690159574468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/khoantap_llama-linear-1-0.5-0.5-merge/49e5e4e4-6905-4b9e-9f53-b7ac598b5102.json b/leaderboard_data/HFOpenLLMv2/meta/khoantap_llama-linear-1-0.5-0.5-merge/49e5e4e4-6905-4b9e-9f53-b7ac598b5102.json deleted file mode 100644 index 713403a7362372dc30f19e1b7061f3168e0ea606..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/khoantap_llama-linear-1-0.5-0.5-merge/49e5e4e4-6905-4b9e-9f53-b7ac598b5102.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/khoantap_llama-linear-1-0.5-0.5-merge/1762652580.308746", - "retrieved_timestamp": "1762652580.308747", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "khoantap/llama-linear-1-0.5-0.5-merge", - "developer": "meta", - "inference_platform": "unknown", - "id": "khoantap/llama-linear-1-0.5-0.5-merge" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45145436331156885 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5526017944110775 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24773413897280966 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4117604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.363530585106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/khoantap_llama-slerp-merge/e30c2825-6d36-454c-8787-e5cbdfcbcfdf.json b/leaderboard_data/HFOpenLLMv2/meta/khoantap_llama-slerp-merge/e30c2825-6d36-454c-8787-e5cbdfcbcfdf.json deleted file mode 100644 index 186939cad51c9e180be492f1f45759969cf99229..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/khoantap_llama-slerp-merge/e30c2825-6d36-454c-8787-e5cbdfcbcfdf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/khoantap_llama-slerp-merge/1762652580.308971", - "retrieved_timestamp": "1762652580.3089721", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "khoantap/llama-slerp-merge", - "developer": "meta", - "inference_platform": "unknown", - "id": "khoantap/llama-slerp-merge" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49799088640363126 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5782782780315171 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40531249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3677692819148936 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/khulaifi95_Llama-3.1-8B-Reason-Blend-888k/85a2710f-feaf-4dc2-aafa-04c33abf6425.json b/leaderboard_data/HFOpenLLMv2/meta/khulaifi95_Llama-3.1-8B-Reason-Blend-888k/85a2710f-feaf-4dc2-aafa-04c33abf6425.json deleted file mode 100644 index 0870e79df318dd9b5a7fe0734153970204eea00b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/khulaifi95_Llama-3.1-8B-Reason-Blend-888k/85a2710f-feaf-4dc2-aafa-04c33abf6425.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/khulaifi95_Llama-3.1-8B-Reason-Blend-888k/1762652580.309421", - "retrieved_timestamp": "1762652580.309421", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "khulaifi95/Llama-3.1-8B-Reason-Blend-888k", - "developer": "meta", - "inference_platform": "unknown", - "id": "khulaifi95/Llama-3.1-8B-Reason-Blend-888k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.583170432230925 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4789526925494476 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3379375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3100066489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/laislemke_LLaMA-2-vicuna-7b-slerp/66d98c7d-7fd1-41bc-9229-855f9d02412d.json b/leaderboard_data/HFOpenLLMv2/meta/laislemke_LLaMA-2-vicuna-7b-slerp/66d98c7d-7fd1-41bc-9229-855f9d02412d.json deleted file mode 100644 index da58c0d5ee1ab7fc7d050801612817510f3010de..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/laislemke_LLaMA-2-vicuna-7b-slerp/66d98c7d-7fd1-41bc-9229-855f9d02412d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/laislemke_LLaMA-2-vicuna-7b-slerp/1762652580.311907", - "retrieved_timestamp": "1762652580.311908", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "laislemke/LLaMA-2-vicuna-7b-slerp", - "developer": "meta", - "inference_platform": "unknown", - "id": "laislemke/LLaMA-2-vicuna-7b-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29320979445648654 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29862163052356266 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3833020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13422539893617022 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/lemon07r_Llama-3-RedMagic4-8B/22ae03c6-dd4f-4263-a005-624dae701da3.json b/leaderboard_data/HFOpenLLMv2/meta/lemon07r_Llama-3-RedMagic4-8B/22ae03c6-dd4f-4263-a005-624dae701da3.json deleted file mode 100644 index e14a86a09d79a8e4d9cf17654b7f0be879306168..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/lemon07r_Llama-3-RedMagic4-8B/22ae03c6-dd4f-4263-a005-624dae701da3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lemon07r_Llama-3-RedMagic4-8B/1762652580.318728", - "retrieved_timestamp": "1762652580.318729", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lemon07r/Llama-3-RedMagic4-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "lemon07r/Llama-3-RedMagic4-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4864005283758206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42560489470390417 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3766354166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3676030585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/lemon07r_llama-3-NeuralMahou-8b/13b8357d-225e-4ba0-bf34-45479a562532.json b/leaderboard_data/HFOpenLLMv2/meta/lemon07r_llama-3-NeuralMahou-8b/13b8357d-225e-4ba0-bf34-45479a562532.json deleted file mode 100644 index 5a576bef21551b0069c89aeff4fbf2c65a92aa75..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/lemon07r_llama-3-NeuralMahou-8b/13b8357d-225e-4ba0-bf34-45479a562532.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lemon07r_llama-3-NeuralMahou-8b/1762652580.319005", - "retrieved_timestamp": "1762652580.319006", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lemon07r/llama-3-NeuralMahou-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "lemon07r/llama-3-NeuralMahou-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49009738604680025 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41841123683301523 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3872708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3690159574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-full/37aa2a50-974f-4cb0-81e3-f160f08c8a0e.json b/leaderboard_data/HFOpenLLMv2/meta/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-full/37aa2a50-974f-4cb0-81e3-f160f08c8a0e.json deleted file mode 100644 index 261385d6aa5cad4b801cfa8cc718ba86e1d25c80..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-full/37aa2a50-974f-4cb0-81e3-f160f08c8a0e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-full/1762652580.32158", - "retrieved_timestamp": "1762652580.32158", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-full", - "developer": "meta", - "inference_platform": "unknown", - "id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-full" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5817464327983085 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4714219934773132 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3221875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33095079787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-half/90ab1587-99b9-48e1-b3f3-8aaf07313eaa.json b/leaderboard_data/HFOpenLLMv2/meta/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-half/90ab1587-99b9-48e1-b3f3-8aaf07313eaa.json deleted file mode 100644 index 10ceb771f2891a3bd0a8e743a7fe2e329f64f1e0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-half/90ab1587-99b9-48e1-b3f3-8aaf07313eaa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-half/1762652580.3218", - "retrieved_timestamp": "1762652580.321801", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half", - "developer": "meta", - "inference_platform": "unknown", - "id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6249107922534431 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47074584910573014 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24496644295302014 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35158333333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36136968085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-top25/ebfb14c0-d725-4650-9d04-ed4f7ebaf676.json b/leaderboard_data/HFOpenLLMv2/meta/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-top25/ebfb14c0-d725-4650-9d04-ed4f7ebaf676.json deleted file mode 100644 index 66d654fbb95bfdfc0a480630f957a8d7a3f27c92..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-top25/ebfb14c0-d725-4650-9d04-ed4f7ebaf676.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-top25/1762652580.322012", - "retrieved_timestamp": "1762652580.322013", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25", - "developer": "meta", - "inference_platform": "unknown", - "id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6636535503574958 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4864641205580417 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35660416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3684341755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-top75/fcb13fe4-e314-4cdd-ae6e-82531ad6a829.json b/leaderboard_data/HFOpenLLMv2/meta/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-top75/fcb13fe4-e314-4cdd-ae6e-82531ad6a829.json deleted file mode 100644 index c2a0a0af3c72768f61d3e91e5931adf4f375023a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-top75/fcb13fe4-e314-4cdd-ae6e-82531ad6a829.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lightblue_suzume-llama-3-8B-multilingual-orpo-borda-top75/1762652580.322237", - "retrieved_timestamp": "1762652580.322238", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top75", - "developer": "meta", - "inference_platform": "unknown", - "id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top75" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6687245397766814 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48333166095856117 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3816875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37691156914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/lightblue_suzume-llama-3-8B-multilingual/8eaee9b3-78b0-4523-9151-695c27c5cfa7.json b/leaderboard_data/HFOpenLLMv2/meta/lightblue_suzume-llama-3-8B-multilingual/8eaee9b3-78b0-4523-9151-695c27c5cfa7.json deleted file mode 100644 index b2d6ee97d7e615125ca26d03334062cb5c6f9969..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/lightblue_suzume-llama-3-8B-multilingual/8eaee9b3-78b0-4523-9151-695c27c5cfa7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/lightblue_suzume-llama-3-8B-multilingual/1762652580.321283", - "retrieved_timestamp": "1762652580.321284", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "lightblue/suzume-llama-3-8B-multilingual", - "developer": "meta", - "inference_platform": "unknown", - "id": "lightblue/suzume-llama-3-8B-multilingual" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6678003253589365 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49499524187359745 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09441087613293052 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39768749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33834773936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/m42-health_Llama3-Med42-70B/36ebe051-2bac-46cb-b990-33025df0ccac.json b/leaderboard_data/HFOpenLLMv2/meta/m42-health_Llama3-Med42-70B/36ebe051-2bac-46cb-b990-33025df0ccac.json deleted file mode 100644 index 8060f56e3bc636ee802701b6e57f1f25de7d98b1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/m42-health_Llama3-Med42-70B/36ebe051-2bac-46cb-b990-33025df0ccac.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/m42-health_Llama3-Med42-70B/1762652580.328667", - "retrieved_timestamp": "1762652580.328667", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "m42-health/Llama3-Med42-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "m42-health/Llama3-Med42-70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6291074349392944 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6687891109485058 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2258308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46289583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4962599734042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/maldv_badger-kappa-llama-3-8b/32e1b138-c236-48e3-8152-d3715127d309.json b/leaderboard_data/HFOpenLLMv2/meta/maldv_badger-kappa-llama-3-8b/32e1b138-c236-48e3-8152-d3715127d309.json deleted file mode 100644 index dbe17017b0ad3255b2e79cb6ecb6bd11881ff18d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/maldv_badger-kappa-llama-3-8b/32e1b138-c236-48e3-8152-d3715127d309.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/maldv_badger-kappa-llama-3-8b/1762652580.331178", - "retrieved_timestamp": "1762652580.331179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "maldv/badger-kappa-llama-3-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "maldv/badger-kappa-llama-3-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46946435457918323 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5084927997756815 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08610271903323263 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3765104166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3695146276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/maldv_badger-lambda-llama-3-8b/18ae9d71-15e0-4d11-86c0-9cac4dbaa3f3.json b/leaderboard_data/HFOpenLLMv2/meta/maldv_badger-lambda-llama-3-8b/18ae9d71-15e0-4d11-86c0-9cac4dbaa3f3.json deleted file mode 100644 index 19ffcd8210e9a59209e36ce4340ad180e222d466..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/maldv_badger-lambda-llama-3-8b/18ae9d71-15e0-4d11-86c0-9cac4dbaa3f3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/maldv_badger-lambda-llama-3-8b/1762652580.331519", - "retrieved_timestamp": "1762652580.33152", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "maldv/badger-lambda-llama-3-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "maldv/badger-lambda-llama-3-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4860758343417687 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49634866510444836 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09441087613293052 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3753645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37666223404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/maldv_badger-mu-llama-3-8b/d43699f9-e6e5-428b-ab52-9d7114443608.json b/leaderboard_data/HFOpenLLMv2/meta/maldv_badger-mu-llama-3-8b/d43699f9-e6e5-428b-ab52-9d7114443608.json deleted file mode 100644 index d7df327f2e4283d14ade292d888d0216e10fdd90..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/maldv_badger-mu-llama-3-8b/d43699f9-e6e5-428b-ab52-9d7114443608.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/maldv_badger-mu-llama-3-8b/1762652580.3317509", - "retrieved_timestamp": "1762652580.3317518", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "maldv/badger-mu-llama-3-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "maldv/badger-mu-llama-3-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49194581488229006 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.514287576852281 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35545833333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3673537234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/maldv_badger-writer-llama-3-8b/7c88458f-e9a0-4e90-b5ed-dbdb6fd49b9d.json b/leaderboard_data/HFOpenLLMv2/meta/maldv_badger-writer-llama-3-8b/7c88458f-e9a0-4e90-b5ed-dbdb6fd49b9d.json deleted file mode 100644 index 13b8536b0420ddefba0b6c5e70dc732a9dc7382e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/maldv_badger-writer-llama-3-8b/7c88458f-e9a0-4e90-b5ed-dbdb6fd49b9d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/maldv_badger-writer-llama-3-8b/1762652580.332005", - "retrieved_timestamp": "1762652580.332005", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "maldv/badger-writer-llama-3-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "maldv/badger-writer-llama-3-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5303140112678804 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4863893856673737 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0755287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35809375000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3759973404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/mattshumer_Reflection-Llama-3.1-70B/155f55e9-34e3-4753-a783-31df44e791e0.json b/leaderboard_data/HFOpenLLMv2/meta/mattshumer_Reflection-Llama-3.1-70B/155f55e9-34e3-4753-a783-31df44e791e0.json deleted file mode 100644 index 88778d7e94d878ae11d7ab4b478b0c149bf1203d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/mattshumer_Reflection-Llama-3.1-70B/155f55e9-34e3-4753-a783-31df44e791e0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mattshumer_Reflection-Llama-3.1-70B/1762652580.341989", - "retrieved_timestamp": "1762652580.341989", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mattshumer/Reflection-Llama-3.1-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "mattshumer/Reflection-Llama-3.1-70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00452133671990319 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.645001286484342 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36325503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45765625000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4955119680851064 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/meditsolutions_Llama-3.1-MedIT-SUN-8B/94d286c8-8356-4bdd-ac91-2ce517b6b974.json b/leaderboard_data/HFOpenLLMv2/meta/meditsolutions_Llama-3.1-MedIT-SUN-8B/94d286c8-8356-4bdd-ac91-2ce517b6b974.json deleted file mode 100644 index 85c7bee4dc287ef4ce248df10c7278e53637447f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/meditsolutions_Llama-3.1-MedIT-SUN-8B/94d286c8-8356-4bdd-ac91-2ce517b6b974.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.1-MedIT-SUN-8B/1762652580.342782", - "retrieved_timestamp": "1762652580.342783", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meditsolutions/Llama-3.1-MedIT-SUN-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "meditsolutions/Llama-3.1-MedIT-SUN-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7837293935646308 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5186924904597405 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20921450151057402 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40562499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3916223404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/meditsolutions_Llama-3.2-SUN-2.4B-checkpoint-26000/85ccad14-a4eb-41c8-b1b7-f2d0215c358a.json b/leaderboard_data/HFOpenLLMv2/meta/meditsolutions_Llama-3.2-SUN-2.4B-checkpoint-26000/85ccad14-a4eb-41c8-b1b7-f2d0215c358a.json deleted file mode 100644 index 787458c03e2f6fe973baa3fe7f0b45739b5dc4f5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/meditsolutions_Llama-3.2-SUN-2.4B-checkpoint-26000/85ccad14-a4eb-41c8-b1b7-f2d0215c358a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-2.4B-checkpoint-26000/1762652580.3434849", - "retrieved_timestamp": "1762652580.343486", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-26000", - "developer": "meta", - "inference_platform": "unknown", - "id": "meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-26000" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28139447776344545 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3017752699243885 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41033333333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1344747340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.209 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/meditsolutions_Llama-3.2-SUN-2.4B-checkpoint-34800/23dca426-d0d9-43d0-86ff-50e01cc292d0.json b/leaderboard_data/HFOpenLLMv2/meta/meditsolutions_Llama-3.2-SUN-2.4B-checkpoint-34800/23dca426-d0d9-43d0-86ff-50e01cc292d0.json deleted file mode 100644 index d689725d30b675919d72894c85d6b6ee2bdfb58f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/meditsolutions_Llama-3.2-SUN-2.4B-checkpoint-34800/23dca426-d0d9-43d0-86ff-50e01cc292d0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-2.4B-checkpoint-34800/1762652580.343692", - "retrieved_timestamp": "1762652580.343693", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-34800", - "developer": "meta", - "inference_platform": "unknown", - "id": "meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-34800" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25009530268576263 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3161124673749052 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4022395833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13572140957446807 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.209 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/meditsolutions_Llama-3.2-SUN-2.4B-v1.0.0/bba22496-6f3a-4ddb-8a69-5995e72aa15f.json b/leaderboard_data/HFOpenLLMv2/meta/meditsolutions_Llama-3.2-SUN-2.4B-v1.0.0/bba22496-6f3a-4ddb-8a69-5995e72aa15f.json deleted file mode 100644 index 7139fb72d44480b26a2061ed09a01089c64643f5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/meditsolutions_Llama-3.2-SUN-2.4B-v1.0.0/bba22496-6f3a-4ddb-8a69-5995e72aa15f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meditsolutions_Llama-3.2-SUN-2.4B-v1.0.0/1762652580.343897", - "retrieved_timestamp": "1762652580.343898", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0", - "developer": "meta", - "inference_platform": "unknown", - "id": "meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5636865738462834 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3390826682107771 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32094791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15425531914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.472 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Llama-2-13b-hf/7a0c1d3a-26f5-44d0-8ca1-8ce6db39cb99.json b/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Llama-2-13b-hf/7a0c1d3a-26f5-44d0-8ca1-8ce6db39cb99.json deleted file mode 100644 index cc55bd0993351be4af39e5ee6fef582e4b0faec7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Llama-2-13b-hf/7a0c1d3a-26f5-44d0-8ca1-8ce6db39cb99.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-2-13b-hf/1762652580.3493812", - "retrieved_timestamp": "1762652580.349382", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meta-llama/Llama-2-13b-hf", - "developer": "meta", - "inference_platform": "unknown", - "id": "meta-llama/Llama-2-13b-hf" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24824687385027283 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41256242233835055 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23778257978723405 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.016 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Llama-2-70b-hf/70acb3cd-fea6-481a-8bf4-fa72e953c110.json b/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Llama-2-70b-hf/70acb3cd-fea6-481a-8bf4-fa72e953c110.json deleted file mode 100644 index 01d7666c25c968a54260967d9a35c4e81e956692..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Llama-2-70b-hf/70acb3cd-fea6-481a-8bf4-fa72e953c110.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-2-70b-hf/1762652580.3500109", - "retrieved_timestamp": "1762652580.3500118", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meta-llama/Llama-2-70b-hf", - "developer": "meta", - "inference_platform": "unknown", - "id": "meta-llama/Llama-2-70b-hf" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2406780675274937 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5472591190449342 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41235416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37175864361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 68.977 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Llama-2-7b-hf/36fbd2e7-97fa-4ba4-aad2-47bfc225771d.json b/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Llama-2-7b-hf/36fbd2e7-97fa-4ba4-aad2-47bfc225771d.json deleted file mode 100644 index fc3674ed41ed6b84f3419eb4ed95cb250de1726d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Llama-2-7b-hf/36fbd2e7-97fa-4ba4-aad2-47bfc225771d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-2-7b-hf/1762652580.350465", - "retrieved_timestamp": "1762652580.350466", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meta-llama/Llama-2-7b-hf", - "developer": "meta", - "inference_platform": "unknown", - "id": "meta-llama/Llama-2-7b-hf" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2518938638368418 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34961958199821835 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37006249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18608710106382978 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Llama-3.1-70B/88d33049-cd88-4b4a-94ba-d0c35a635cfc.json b/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Llama-3.1-70B/88d33049-cd88-4b4a-94ba-d0c35a635cfc.json deleted file mode 100644 index dcb3c82f4f858822ed2f338d76a31701577a69b1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Llama-3.1-70B/88d33049-cd88-4b4a-94ba-d0c35a635cfc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.1-70B/1762652580.350682", - "retrieved_timestamp": "1762652580.350682", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meta-llama/Llama-3.1-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "meta-llama/Llama-3.1-70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16843752354862876 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.626006918317161 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18429003021148038 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4571875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4654255319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Llama-3.1-8B/58e87619-6244-45b9-8a1f-b2f8f0d0cd31.json b/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Llama-3.1-8B/58e87619-6244-45b9-8a1f-b2f8f0d0cd31.json deleted file mode 100644 index baa573bdfb87745fc772703781752b2c0efff343..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Llama-3.1-8B/58e87619-6244-45b9-8a1f-b2f8f0d0cd31.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.1-8B/1762652580.351093", - "retrieved_timestamp": "1762652580.351093", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meta-llama/Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "meta-llama/Llama-3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12459828809780273 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46595905446007296 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06570996978851963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3811875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32878989361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Llama-3.2-1B/b4b6a8d2-be7f-4b8f-b280-3e62015a61d3.json b/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Llama-3.2-1B/b4b6a8d2-be7f-4b8f-b280-3e62015a61d3.json deleted file mode 100644 index 93883390cf632a6464fd1607584aca3a994294d3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Llama-3.2-1B/b4b6a8d2-be7f-4b8f-b280-3e62015a61d3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.2-1B/1762652580.3515048", - "retrieved_timestamp": "1762652580.351506", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meta-llama/Llama-3.2-1B", - "developer": "meta", - "inference_platform": "unknown", - "id": "meta-llama/Llama-3.2-1B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14777900415342402 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31149540964608097 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22818791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12034574468085106 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.24 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Llama-3.2-3B/19aba348-6bdd-425a-bd7b-505aa2658f6c.json b/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Llama-3.2-3B/19aba348-6bdd-425a-bd7b-505aa2658f6c.json deleted file mode 100644 index e8175dd9ab34a9c56acc70b830dd3a2638300c30..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Llama-3.2-3B/19aba348-6bdd-425a-bd7b-505aa2658f6c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meta-llama_Llama-3.2-3B/1762652580.351924", - "retrieved_timestamp": "1762652580.351925", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meta-llama/Llama-3.2-3B", - "developer": "meta", - "inference_platform": "unknown", - "id": "meta-llama/Llama-3.2-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13374069690643048 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3905117116991059 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35771875000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2487533244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Meta-Llama-3-70B/dddadaa0-6808-4b34-a6e2-29663460c3e0.json b/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Meta-Llama-3-70B/dddadaa0-6808-4b34-a6e2-29663460c3e0.json deleted file mode 100644 index fe2fa525f460fef087f7752bb0a9310e42136662..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Meta-Llama-3-70B/dddadaa0-6808-4b34-a6e2-29663460c3e0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meta-llama_Meta-Llama-3-70B/1762652580.352541", - "retrieved_timestamp": "1762652580.352541", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meta-llama/Meta-Llama-3-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "meta-llama/Meta-Llama-3-70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1603190645265673 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6461074599904467 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3976510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4518229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4709109042553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Meta-Llama-3-8B/75f6ae05-a987-455d-8167-fc345d55c370.json b/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Meta-Llama-3-8B/75f6ae05-a987-455d-8167-fc345d55c370.json deleted file mode 100644 index e0720a7a27ef8fa0ebd95a7d12c2817022479eeb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/meta-llama_Meta-Llama-3-8B/75f6ae05-a987-455d-8167-fc345d55c370.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meta-llama_Meta-Llama-3-8B/1762652580.352957", - "retrieved_timestamp": "1762652580.352957", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meta-llama/Meta-Llama-3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "meta-llama/Meta-Llama-3-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14550614591506092 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4597905195240255 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36140625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32097739361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/migtissera_Llama-3-70B-Synthia-v3.5/7ba5e7cb-3050-4838-8762-4b31a5c9d912.json b/leaderboard_data/HFOpenLLMv2/meta/migtissera_Llama-3-70B-Synthia-v3.5/7ba5e7cb-3050-4838-8762-4b31a5c9d912.json deleted file mode 100644 index e5560dd779f0a8b4ee5becf8235365a55f0c61c7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/migtissera_Llama-3-70B-Synthia-v3.5/7ba5e7cb-3050-4838-8762-4b31a5c9d912.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/migtissera_Llama-3-70B-Synthia-v3.5/1762652580.358073", - "retrieved_timestamp": "1762652580.3580742", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "migtissera/Llama-3-70B-Synthia-v3.5", - "developer": "meta", - "inference_platform": "unknown", - "id": "migtissera/Llama-3-70B-Synthia-v3.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6076499244227538 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6488638026271278 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21148036253776434 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49219791666666673 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4658410904255319 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/migtissera_Llama-3-8B-Synthia-v3.5/3c843cd0-ce71-4feb-9452-65fc7534518e.json b/leaderboard_data/HFOpenLLMv2/meta/migtissera_Llama-3-8B-Synthia-v3.5/3c843cd0-ce71-4feb-9452-65fc7534518e.json deleted file mode 100644 index f1fcd196842354a0b2a381aa24655db9ccce00cd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/migtissera_Llama-3-8B-Synthia-v3.5/3c843cd0-ce71-4feb-9452-65fc7534518e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/migtissera_Llama-3-8B-Synthia-v3.5/1762652580.358322", - "retrieved_timestamp": "1762652580.358322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "migtissera/Llama-3-8B-Synthia-v3.5", - "developer": "meta", - "inference_platform": "unknown", - "id": "migtissera/Llama-3-8B-Synthia-v3.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5069582042314393 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4887940933660044 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06570996978851963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40438541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30302526595744683 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/mindw96_DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3/ce85152e-fdde-406a-9818-0eb945ff1d6a.json b/leaderboard_data/HFOpenLLMv2/meta/mindw96_DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3/ce85152e-fdde-406a-9818-0eb945ff1d6a.json deleted file mode 100644 index b7df556a91a6ad83a941c0cd186ab95456290f0c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/mindw96_DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3/ce85152e-fdde-406a-9818-0eb945ff1d6a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mindw96_DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3/1762652580.360158", - "retrieved_timestamp": "1762652580.360159", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mindw96/DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3", - "developer": "meta", - "inference_platform": "unknown", - "id": "mindw96/DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13881168632561602 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3067536965504715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3792083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11062167553191489 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/mkurman_llama-3.2-MEDIT-3B-o1/43a51d6d-e038-4476-a63b-2f4260d736d4.json b/leaderboard_data/HFOpenLLMv2/meta/mkurman_llama-3.2-MEDIT-3B-o1/43a51d6d-e038-4476-a63b-2f4260d736d4.json deleted file mode 100644 index 54943f8dfaf4e47ced6fb0691f62ffb475be0df5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/mkurman_llama-3.2-MEDIT-3B-o1/43a51d6d-e038-4476-a63b-2f4260d736d4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mkurman_llama-3.2-MEDIT-3B-o1/1762652580.365804", - "retrieved_timestamp": "1762652580.3658051", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mkurman/llama-3.2-MEDIT-3B-o1", - "developer": "meta", - "inference_platform": "unknown", - "id": "mkurman/llama-3.2-MEDIT-3B-o1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43816517950150047 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43996584807961553 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13066465256797583 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27410239361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.607 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/mkxu_llama-3-8b-po1/e26ea6fd-723d-45de-b0f1-5bcbae1eb992.json b/leaderboard_data/HFOpenLLMv2/meta/mkxu_llama-3-8b-po1/e26ea6fd-723d-45de-b0f1-5bcbae1eb992.json deleted file mode 100644 index 0d376bb515032155731133b8ee44e8347cd849ef..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/mkxu_llama-3-8b-po1/e26ea6fd-723d-45de-b0f1-5bcbae1eb992.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mkxu_llama-3-8b-po1/1762652580.3669372", - "retrieved_timestamp": "1762652580.366938", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mkxu/llama-3-8b-po1", - "developer": "meta", - "inference_platform": "unknown", - "id": "mkxu/llama-3-8b-po1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4081149128756145 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49760854852246356 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3804166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3562167553191489 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/mlabonne_ChimeraLlama-3-8B-v2/fd31a5f1-986e-4040-b04b-3018161e6e66.json b/leaderboard_data/HFOpenLLMv2/meta/mlabonne_ChimeraLlama-3-8B-v2/fd31a5f1-986e-4040-b04b-3018161e6e66.json deleted file mode 100644 index 06379d4565a780626bbe3b341a7fe57ebd4b0e29..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/mlabonne_ChimeraLlama-3-8B-v2/fd31a5f1-986e-4040-b04b-3018161e6e66.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mlabonne_ChimeraLlama-3-8B-v2/1762652580.3680582", - "retrieved_timestamp": "1762652580.3680582", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mlabonne/ChimeraLlama-3-8B-v2", - "developer": "meta", - "inference_platform": "unknown", - "id": "mlabonne/ChimeraLlama-3-8B-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44688315890725494 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5045597361952603 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3790833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3568816489361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/mlabonne_ChimeraLlama-3-8B-v3/eef221de-8dc3-410a-943d-900c810948ae.json b/leaderboard_data/HFOpenLLMv2/meta/mlabonne_ChimeraLlama-3-8B-v3/eef221de-8dc3-410a-943d-900c810948ae.json deleted file mode 100644 index 031237108573064f96c4084ebed0da3e0b1aa718..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/mlabonne_ChimeraLlama-3-8B-v3/eef221de-8dc3-410a-943d-900c810948ae.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mlabonne_ChimeraLlama-3-8B-v3/1762652580.3683012", - "retrieved_timestamp": "1762652580.3683012", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mlabonne/ChimeraLlama-3-8B-v3", - "developer": "meta", - "inference_platform": "unknown", - "id": "mlabonne/ChimeraLlama-3-8B-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44078821970150317 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49781902726529204 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08836858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4003541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36685505319148937 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/mlabonne_Hermes-3-Llama-3.1-70B-lorablated/07190707-16fb-47fc-9813-4f2408a04bdb.json b/leaderboard_data/HFOpenLLMv2/meta/mlabonne_Hermes-3-Llama-3.1-70B-lorablated/07190707-16fb-47fc-9813-4f2408a04bdb.json deleted file mode 100644 index 54ce54271bb6f86fa49e6cc70ffce506435fde1f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/mlabonne_Hermes-3-Llama-3.1-70B-lorablated/07190707-16fb-47fc-9813-4f2408a04bdb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mlabonne_Hermes-3-Llama-3.1-70B-lorablated/1762652580.368906", - "retrieved_timestamp": "1762652580.368906", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mlabonne/Hermes-3-Llama-3.1-70B-lorablated", - "developer": "meta", - "inference_platform": "unknown", - "id": "mlabonne/Hermes-3-Llama-3.1-70B-lorablated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34244360518978534 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6693171063183693 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36577181208053694 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5029270833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4679188829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/mlabonne_OrpoLlama-3-8B/b8b5b30e-d259-49ae-8155-7f63ddae88c8.json b/leaderboard_data/HFOpenLLMv2/meta/mlabonne_OrpoLlama-3-8B/b8b5b30e-d259-49ae-8155-7f63ddae88c8.json deleted file mode 100644 index fa4bed186c2eaf244da626ce199ed224b52a797f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/mlabonne_OrpoLlama-3-8B/b8b5b30e-d259-49ae-8155-7f63ddae88c8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mlabonne_OrpoLlama-3-8B/1762652580.369958", - "retrieved_timestamp": "1762652580.3699589", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mlabonne/OrpoLlama-3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "mlabonne/OrpoLlama-3-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36527524745453177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4424079063503051 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3579375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2705285904255319 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/mmnga_Llama-3-70B-japanese-suzume-vector-v0.1/56f52103-ea5e-4228-ac7b-3c6929fe5b76.json b/leaderboard_data/HFOpenLLMv2/meta/mmnga_Llama-3-70B-japanese-suzume-vector-v0.1/56f52103-ea5e-4228-ac7b-3c6929fe5b76.json deleted file mode 100644 index 7820dd9745778feaaccb29f7977efbda1d9c3ad2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/mmnga_Llama-3-70B-japanese-suzume-vector-v0.1/56f52103-ea5e-4228-ac7b-3c6929fe5b76.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mmnga_Llama-3-70B-japanese-suzume-vector-v0.1/1762652580.370961", - "retrieved_timestamp": "1762652580.370962", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mmnga/Llama-3-70B-japanese-suzume-vector-v0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "mmnga/Llama-3-70B-japanese-suzume-vector-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4648931501748693 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6541763652331517 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4140625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5224401595744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/mobiuslabsgmbh_DeepSeek-R1-ReDistill-Llama3-8B-v1.1/09ec0c0c-d403-4f23-99a4-61196c70734d.json b/leaderboard_data/HFOpenLLMv2/meta/mobiuslabsgmbh_DeepSeek-R1-ReDistill-Llama3-8B-v1.1/09ec0c0c-d403-4f23-99a4-61196c70734d.json deleted file mode 100644 index 35a5a637429365a00610db42393b3ed21f8a7528..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/mobiuslabsgmbh_DeepSeek-R1-ReDistill-Llama3-8B-v1.1/09ec0c0c-d403-4f23-99a4-61196c70734d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mobiuslabsgmbh_DeepSeek-R1-ReDistill-Llama3-8B-v1.1/1762652580.371218", - "retrieved_timestamp": "1762652580.371218", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Llama3-8B-v1.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Llama3-8B-v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.370396104558128 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34730320150504124 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3285498489425982 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33955208333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2198304521276596 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/mukaj_Llama-3.1-Hawkish-8B/b94f468b-7c0e-491e-8404-de1bad7ff0f0.json b/leaderboard_data/HFOpenLLMv2/meta/mukaj_Llama-3.1-Hawkish-8B/b94f468b-7c0e-491e-8404-de1bad7ff0f0.json deleted file mode 100644 index 38a1fd16d811dc09fd2a404a6652b541d3f766d5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/mukaj_Llama-3.1-Hawkish-8B/b94f468b-7c0e-491e-8404-de1bad7ff0f0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mukaj_Llama-3.1-Hawkish-8B/1762652580.3748438", - "retrieved_timestamp": "1762652580.374845", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mukaj/Llama-3.1-Hawkish-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "mukaj/Llama-3.1-Hawkish-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6720468357291984 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4883822828416351 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39672916666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33311170212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/nbeerbower_Llama-3.1-Nemotron-lorablated-70B/a9af8b88-8f00-4662-8ca4-d042030885ae.json b/leaderboard_data/HFOpenLLMv2/meta/nbeerbower_Llama-3.1-Nemotron-lorablated-70B/a9af8b88-8f00-4662-8ca4-d042030885ae.json deleted file mode 100644 index f5119fa885a19fadf463a875471f8714435429b5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/nbeerbower_Llama-3.1-Nemotron-lorablated-70B/a9af8b88-8f00-4662-8ca4-d042030885ae.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Llama-3.1-Nemotron-lorablated-70B/1762652580.379643", - "retrieved_timestamp": "1762652580.379644", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Llama-3.1-Nemotron-lorablated-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "nbeerbower/Llama-3.1-Nemotron-lorablated-70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7228797368759337 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6825051293384551 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338368580060423 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39093959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4681666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5343251329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/nbeerbower_Llama3.1-Gutenberg-Doppel-70B/fffd0da2-d4b0-4a11-9fd4-c0dfa0c70431.json b/leaderboard_data/HFOpenLLMv2/meta/nbeerbower_Llama3.1-Gutenberg-Doppel-70B/fffd0da2-d4b0-4a11-9fd4-c0dfa0c70431.json deleted file mode 100644 index 705f4dbbc01882d1e287615b6333e92681bc5ce0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/nbeerbower_Llama3.1-Gutenberg-Doppel-70B/fffd0da2-d4b0-4a11-9fd4-c0dfa0c70431.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Llama3.1-Gutenberg-Doppel-70B/1762652580.379898", - "retrieved_timestamp": "1762652580.3798988", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Llama3.1-Gutenberg-Doppel-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "nbeerbower/Llama3.1-Gutenberg-Doppel-70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7092159913474027 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6660891255994471 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2122356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447986577181208 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48971875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4736535904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/nbeerbower_llama-3-gutenberg-8B/144ff584-3230-42e5-acae-35518b10a1e9.json b/leaderboard_data/HFOpenLLMv2/meta/nbeerbower_llama-3-gutenberg-8B/144ff584-3230-42e5-acae-35518b10a1e9.json deleted file mode 100644 index e5f1552ae86be59ac0da50f7322b35febf4b3735..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/nbeerbower_llama-3-gutenberg-8B/144ff584-3230-42e5-acae-35518b10a1e9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_llama-3-gutenberg-8B/1762652580.3850691", - "retrieved_timestamp": "1762652580.385074", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/llama-3-gutenberg-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "nbeerbower/llama-3-gutenberg-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4371910973993448 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49936002561994197 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40730208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.383061835106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/nbeerbower_llama3.1-cc-8B/e011ff58-ea5c-4857-a76d-503c4188886f.json b/leaderboard_data/HFOpenLLMv2/meta/nbeerbower_llama3.1-cc-8B/e011ff58-ea5c-4857-a76d-503c4188886f.json deleted file mode 100644 index db13ce9d07d4b2d4d40154de0131f207272fadc6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/nbeerbower_llama3.1-cc-8B/e011ff58-ea5c-4857-a76d-503c4188886f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_llama3.1-cc-8B/1762652580.385431", - "retrieved_timestamp": "1762652580.385432", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/llama3.1-cc-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "nbeerbower/llama3.1-cc-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5068086011782071 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4871187428614386 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07099697885196375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38851041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3346908244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/nbeerbower_llama3.1-kartoffeldes-70B/c17cced5-be98-49c5-a919-c15b641ba2e7.json b/leaderboard_data/HFOpenLLMv2/meta/nbeerbower_llama3.1-kartoffeldes-70B/c17cced5-be98-49c5-a919-c15b641ba2e7.json deleted file mode 100644 index 412b891fd293b595b83e3acec0d9da303a0a75a3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/nbeerbower_llama3.1-kartoffeldes-70B/c17cced5-be98-49c5-a919-c15b641ba2e7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_llama3.1-kartoffeldes-70B/1762652580.385698", - "retrieved_timestamp": "1762652580.385699", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/llama3.1-kartoffeldes-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "nbeerbower/llama3.1-kartoffeldes-70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8230218043679659 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6893878613110068 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3217522658610272 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46460416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4988364361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/necva_IE-cont-Llama3.1-8B/43f5a551-7257-4595-9b0c-60799ade231b.json b/leaderboard_data/HFOpenLLMv2/meta/necva_IE-cont-Llama3.1-8B/43f5a551-7257-4595-9b0c-60799ade231b.json deleted file mode 100644 index 6dc445014f90dbfb53c9f6d5dfca81635baa110d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/necva_IE-cont-Llama3.1-8B/43f5a551-7257-4595-9b0c-60799ade231b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/necva_IE-cont-Llama3.1-8B/1762652580.3888798", - "retrieved_timestamp": "1762652580.388881", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "necva/IE-cont-Llama3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "necva/IE-cont-Llama3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20490742341431845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911778102988436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35753125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11668882978723404 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/netcat420_Llama3.1-MFANN-8b/aa3467df-1a74-47af-b635-0318df88dd58.json b/leaderboard_data/HFOpenLLMv2/meta/netcat420_Llama3.1-MFANN-8b/aa3467df-1a74-47af-b635-0318df88dd58.json deleted file mode 100644 index 983249bc625130ed15ced6c42f1e34825f1d79ff..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/netcat420_Llama3.1-MFANN-8b/aa3467df-1a74-47af-b635-0318df88dd58.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_Llama3.1-MFANN-8b/1762652580.3921962", - "retrieved_timestamp": "1762652580.3921971", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/Llama3.1-MFANN-8b", - "developer": "meta", - "inference_platform": "unknown", - "id": "netcat420/Llama3.1-MFANN-8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29695651981187693 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4281154680742545 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33790625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27252327127659576 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-Llama3.1-Abliterated-SLERP-TIES-V2/a9c38a44-a973-4bfd-a1f1-aa094d5e37fd.json b/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-Llama3.1-Abliterated-SLERP-TIES-V2/a9c38a44-a973-4bfd-a1f1-aa094d5e37fd.json deleted file mode 100644 index 8b06083b226082e8fba508b86cd33cd4e523fd86..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-Llama3.1-Abliterated-SLERP-TIES-V2/a9c38a44-a973-4bfd-a1f1-aa094d5e37fd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-Llama3.1-Abliterated-SLERP-TIES-V2/1762652580.3924491", - "retrieved_timestamp": "1762652580.39245", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V2", - "developer": "meta", - "inference_platform": "unknown", - "id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4209796672828096 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49237606236472237 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37276041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35222739361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-Llama3.1-Abliterated-SLERP-TIES-V3/e5a71267-56c7-418a-bfcc-b4b5ed10496e.json b/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-Llama3.1-Abliterated-SLERP-TIES-V3/e5a71267-56c7-418a-bfcc-b4b5ed10496e.json deleted file mode 100644 index c11689bc3c27c09da17282d49d5ed2a75d8faf94..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-Llama3.1-Abliterated-SLERP-TIES-V3/e5a71267-56c7-418a-bfcc-b4b5ed10496e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-Llama3.1-Abliterated-SLERP-TIES-V3/1762652580.3926558", - "retrieved_timestamp": "1762652580.3926558", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V3", - "developer": "meta", - "inference_platform": "unknown", - "id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4238021782204551 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4914021594225444 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0755287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37406249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34898603723404253 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-Llama3.1-Abliterated-SLERP-V4/12a56879-c48c-4422-bc6f-fad813c94414.json b/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-Llama3.1-Abliterated-SLERP-V4/12a56879-c48c-4422-bc6f-fad813c94414.json deleted file mode 100644 index 17effa7de6597a6057818e60d437a3d39ab78cb3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-Llama3.1-Abliterated-SLERP-V4/12a56879-c48c-4422-bc6f-fad813c94414.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-Llama3.1-Abliterated-SLERP-V4/1762652580.39286", - "retrieved_timestamp": "1762652580.392861", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-V4", - "developer": "meta", - "inference_platform": "unknown", - "id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-V4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41688275996577967 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4908971108837563 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38209374999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35164561170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-Llama3.1-Abliterated-SLERP-V5/d52d6e93-b291-4f21-aca7-2c8d48313dec.json b/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-Llama3.1-Abliterated-SLERP-V5/d52d6e93-b291-4f21-aca7-2c8d48313dec.json deleted file mode 100644 index 62b2d4d3060957b778ce88c4e6b3b1eee665a754..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-Llama3.1-Abliterated-SLERP-V5/d52d6e93-b291-4f21-aca7-2c8d48313dec.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-Llama3.1-Abliterated-SLERP-V5/1762652580.393064", - "retrieved_timestamp": "1762652580.393065", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-V5", - "developer": "meta", - "inference_platform": "unknown", - "id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-V5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4328947193446721 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4951892200623516 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.378125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3444980053191489 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-Llama3.1-Abliterated-Slerp-TIES/c5a71d25-35f7-453e-9551-7881046fdeff.json b/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-Llama3.1-Abliterated-Slerp-TIES/c5a71d25-35f7-453e-9551-7881046fdeff.json deleted file mode 100644 index 1f285238457a43b689f51f52f7b991dc61d64fcc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-Llama3.1-Abliterated-Slerp-TIES/c5a71d25-35f7-453e-9551-7881046fdeff.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-Llama3.1-Abliterated-Slerp-TIES/1762652580.393313", - "retrieved_timestamp": "1762652580.393313", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN-Llama3.1-Abliterated-Slerp-TIES", - "developer": "meta", - "inference_platform": "unknown", - "id": "netcat420/MFANN-Llama3.1-Abliterated-Slerp-TIES" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42934746472692453 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49675121796238325 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3531416223404255 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-Llama3.1-Abliterated-Slerp-V3.2/1ef7ee4e-ab54-4e5a-b27f-4d6aeffd3f54.json b/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-Llama3.1-Abliterated-Slerp-V3.2/1ef7ee4e-ab54-4e5a-b27f-4d6aeffd3f54.json deleted file mode 100644 index 098ad84d8bcaeaf7c241ce1111f73d468201e01b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-Llama3.1-Abliterated-Slerp-V3.2/1ef7ee4e-ab54-4e5a-b27f-4d6aeffd3f54.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-Llama3.1-Abliterated-Slerp-V3.2/1762652580.3935192", - "retrieved_timestamp": "1762652580.39352", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN-Llama3.1-Abliterated-Slerp-V3.2", - "developer": "meta", - "inference_platform": "unknown", - "id": "netcat420/MFANN-Llama3.1-Abliterated-Slerp-V3.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41281134057633745 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49782535474346185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37542708333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3527260638297872 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-llama3.1-Abliterated-SLERP/3d3862a4-79df-488c-8d17-dc332fa3abce.json b/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-llama3.1-Abliterated-SLERP/3d3862a4-79df-488c-8d17-dc332fa3abce.json deleted file mode 100644 index bedbff83ca7d4807bbe0fc4957d04b8aa69b03a8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-llama3.1-Abliterated-SLERP/3d3862a4-79df-488c-8d17-dc332fa3abce.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-llama3.1-Abliterated-SLERP/1762652580.394179", - "retrieved_timestamp": "1762652580.39418", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN-llama3.1-Abliterated-SLERP", - "developer": "meta", - "inference_platform": "unknown", - "id": "netcat420/MFANN-llama3.1-Abliterated-SLERP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25906262051357065 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45744999460878283 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3809166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2928025265957447 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-llama3.1-abliterated-SLERP-v3.1/71e87ce8-88f2-4858-b65f-9225f59cc3f9.json b/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-llama3.1-abliterated-SLERP-v3.1/71e87ce8-88f2-4858-b65f-9225f59cc3f9.json deleted file mode 100644 index 23e2ebd215ecdc3f6c91845d4ae37ed917525241..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-llama3.1-abliterated-SLERP-v3.1/71e87ce8-88f2-4858-b65f-9225f59cc3f9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-llama3.1-abliterated-SLERP-v3.1/1762652580.394599", - "retrieved_timestamp": "1762652580.3946", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN-llama3.1-abliterated-SLERP-v3.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "netcat420/MFANN-llama3.1-abliterated-SLERP-v3.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4201551882338861 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.492068920606988 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06948640483383686 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3543051861702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-llama3.1-abliterated-SLERP-v3/73f2659d-ff95-403f-99e0-09de7c807c3c.json b/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-llama3.1-abliterated-SLERP-v3/73f2659d-ff95-403f-99e0-09de7c807c3c.json deleted file mode 100644 index c2846d274ceb948ed4b2cec61485c7f18f8dd8a3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-llama3.1-abliterated-SLERP-v3/73f2659d-ff95-403f-99e0-09de7c807c3c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-llama3.1-abliterated-SLERP-v3/1762652580.394387", - "retrieved_timestamp": "1762652580.394388", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN-llama3.1-abliterated-SLERP-v3", - "developer": "meta", - "inference_platform": "unknown", - "id": "netcat420/MFANN-llama3.1-abliterated-SLERP-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37993856301280604 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49305765460927126 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36603125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35305851063829785 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-llama3.1-abliterated-v2/46728c83-957a-4eb7-8a04-0fee4efe50d1.json b/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-llama3.1-abliterated-v2/46728c83-957a-4eb7-8a04-0fee4efe50d1.json deleted file mode 100644 index 300d4c5a0e6870baae6370deff371dc7bd5d9160..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/netcat420_MFANN-llama3.1-abliterated-v2/46728c83-957a-4eb7-8a04-0fee4efe50d1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-llama3.1-abliterated-v2/1762652580.3948102", - "retrieved_timestamp": "1762652580.394811", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN-llama3.1-abliterated-v2", - "developer": "meta", - "inference_platform": "unknown", - "id": "netcat420/MFANN-llama3.1-abliterated-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4429114748866341 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4940829733015402 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3845416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3490691489361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ngxson_MiniThinky-1B-Llama-3.2/3a05547d-850b-42b5-978d-0aff574cb5ca.json b/leaderboard_data/HFOpenLLMv2/meta/ngxson_MiniThinky-1B-Llama-3.2/3a05547d-850b-42b5-978d-0aff574cb5ca.json deleted file mode 100644 index 7a4062a169e87aec0bb6b54183e7a6b7e9332ed0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ngxson_MiniThinky-1B-Llama-3.2/3a05547d-850b-42b5-978d-0aff574cb5ca.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ngxson_MiniThinky-1B-Llama-3.2/1762652580.4050229", - "retrieved_timestamp": "1762652580.4050229", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ngxson/MiniThinky-1B-Llama-3.2", - "developer": "meta", - "inference_platform": "unknown", - "id": "ngxson/MiniThinky-1B-Llama-3.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2771479673931834 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31422650382721545 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23909395973154363 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34336458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1146941489361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ngxson_MiniThinky-v2-1B-Llama-3.2/f37d1682-5df9-45dc-92ae-6bf587a03e9b.json b/leaderboard_data/HFOpenLLMv2/meta/ngxson_MiniThinky-v2-1B-Llama-3.2/f37d1682-5df9-45dc-92ae-6bf587a03e9b.json deleted file mode 100644 index 24296b7386c2012a4fd6ef5d76a4921c9c79338e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ngxson_MiniThinky-v2-1B-Llama-3.2/f37d1682-5df9-45dc-92ae-6bf587a03e9b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ngxson_MiniThinky-v2-1B-Llama-3.2/1762652580.405281", - "retrieved_timestamp": "1762652580.405282", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ngxson/MiniThinky-v2-1B-Llama-3.2", - "developer": "meta", - "inference_platform": "unknown", - "id": "ngxson/MiniThinky-v2-1B-Llama-3.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2963071317437732 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32051111358951634 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23993288590604026 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3356145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1116190159574468 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/noname0202_llama-math-1b-r16-0to512tokens-test/8fb0f696-49a8-4611-ad82-3b7e19d5d867.json b/leaderboard_data/HFOpenLLMv2/meta/noname0202_llama-math-1b-r16-0to512tokens-test/8fb0f696-49a8-4611-ad82-3b7e19d5d867.json deleted file mode 100644 index 909aeaff2fad5a53ba4d98e9c559c862ce928c88..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/noname0202_llama-math-1b-r16-0to512tokens-test/8fb0f696-49a8-4611-ad82-3b7e19d5d867.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/noname0202_llama-math-1b-r16-0to512tokens-test/1762652580.4104571", - "retrieved_timestamp": "1762652580.410458", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "noname0202/llama-math-1b-r16-0to512tokens-test", - "developer": "meta", - "inference_platform": "unknown", - "id": "noname0202/llama-math-1b-r16-0to512tokens-test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5469753587148765 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34884166022601404 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3143125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17278922872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/noname0202_llama-math-1b-r32-0to512tokens-test/5623295c-0170-4832-b3e9-df00c660c59b.json b/leaderboard_data/HFOpenLLMv2/meta/noname0202_llama-math-1b-r32-0to512tokens-test/5623295c-0170-4832-b3e9-df00c660c59b.json deleted file mode 100644 index 413ac1a671e77a5120b5724d378367a8d4e504ee..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/noname0202_llama-math-1b-r32-0to512tokens-test/5623295c-0170-4832-b3e9-df00c660c59b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/noname0202_llama-math-1b-r32-0to512tokens-test/1762652580.410711", - "retrieved_timestamp": "1762652580.410711", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "noname0202/llama-math-1b-r32-0to512tokens-test", - "developer": "meta", - "inference_platform": "unknown", - "id": "noname0202/llama-math-1b-r32-0to512tokens-test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5682577782505973 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3495183139510159 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32094791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17603058510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/noname0202_llama-math-1b-r32-test/6c3ed9db-730c-48cb-95f9-662467957403.json b/leaderboard_data/HFOpenLLMv2/meta/noname0202_llama-math-1b-r32-test/6c3ed9db-730c-48cb-95f9-662467957403.json deleted file mode 100644 index 8262d7c62c35f0e4b65796cf13a7cd050f13ad9e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/noname0202_llama-math-1b-r32-test/6c3ed9db-730c-48cb-95f9-662467957403.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/noname0202_llama-math-1b-r32-test/1762652580.410917", - "retrieved_timestamp": "1762652580.410918", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "noname0202/llama-math-1b-r32-test", - "developer": "meta", - "inference_platform": "unknown", - "id": "noname0202/llama-math-1b-r32-test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5819215237791282 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3485960127764988 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07250755287009064 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31564583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17810837765957446 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/noname0202_llama-math-1b-r8-512tokens-test/c9d6f048-95b8-44ea-9d17-9d9f2d4854b4.json b/leaderboard_data/HFOpenLLMv2/meta/noname0202_llama-math-1b-r8-512tokens-test/c9d6f048-95b8-44ea-9d17-9d9f2d4854b4.json deleted file mode 100644 index 6d15fe9e5c55e6a07b7c8e9f7206ad8deab4785f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/noname0202_llama-math-1b-r8-512tokens-test/c9d6f048-95b8-44ea-9d17-9d9f2d4854b4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/noname0202_llama-math-1b-r8-512tokens-test/1762652580.411124", - "retrieved_timestamp": "1762652580.411125", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "noname0202/llama-math-1b-r8-512tokens-test", - "developer": "meta", - "inference_platform": "unknown", - "id": "noname0202/llama-math-1b-r8-512tokens-test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5791987482103043 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3495762462148306 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31694791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17528257978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/nvidia_Llama-3.1-Minitron-4B-Depth-Base/98402d5d-95a6-4f48-9745-8653b298b48e.json b/leaderboard_data/HFOpenLLMv2/meta/nvidia_Llama-3.1-Minitron-4B-Depth-Base/98402d5d-95a6-4f48-9745-8653b298b48e.json deleted file mode 100644 index ae59004dea16c9fbb7fdcbb3286e35e386ea8475..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/nvidia_Llama-3.1-Minitron-4B-Depth-Base/98402d5d-95a6-4f48-9745-8653b298b48e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nvidia_Llama-3.1-Minitron-4B-Depth-Base/1762652580.4147708", - "retrieved_timestamp": "1762652580.414772", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nvidia/Llama-3.1-Minitron-4B-Depth-Base", - "developer": "meta", - "inference_platform": "unknown", - "id": "nvidia/Llama-3.1-Minitron-4B-Depth-Base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16069362624502986 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4170704193104893 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40106250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2798371010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.02 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/nvidia_OpenMath2-Llama3.1-8B/31c103fc-22ab-44a0-aeaf-769a9ff803df.json b/leaderboard_data/HFOpenLLMv2/meta/nvidia_OpenMath2-Llama3.1-8B/31c103fc-22ab-44a0-aeaf-769a9ff803df.json deleted file mode 100644 index 07ff88074bb1c5869357b19e57cf0a2863aec686..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/nvidia_OpenMath2-Llama3.1-8B/31c103fc-22ab-44a0-aeaf-769a9ff803df.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nvidia_OpenMath2-Llama3.1-8B/1762652580.416384", - "retrieved_timestamp": "1762652580.416384", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nvidia/OpenMath2-Llama3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "nvidia/OpenMath2-Llama3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23305939352030391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40955241401694514 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2673716012084592 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34355208333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15533577127659576 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/ontocord_Llama_3.2_1b-autoredteam_helpfulness-train/8277cf4f-865b-4b3e-afcb-b906064dfc20.json b/leaderboard_data/HFOpenLLMv2/meta/ontocord_Llama_3.2_1b-autoredteam_helpfulness-train/8277cf4f-865b-4b3e-afcb-b906064dfc20.json deleted file mode 100644 index a21fde5c4b168cb81608d293cb855ecc24637b00..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/ontocord_Llama_3.2_1b-autoredteam_helpfulness-train/8277cf4f-865b-4b3e-afcb-b906064dfc20.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_Llama_3.2_1b-autoredteam_helpfulness-train/1762652580.417561", - "retrieved_timestamp": "1762652580.417561", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/Llama_3.2_1b-autoredteam_helpfulness-train", - "developer": "meta", - "inference_platform": "unknown", - "id": "ontocord/Llama_3.2_1b-autoredteam_helpfulness-train" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2765484470094904 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31150775306414563 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.345875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11319813829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.498 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/oopere_Llama-FinSent-S/8b9ec467-1555-415c-b1ee-23be18ded9e5.json b/leaderboard_data/HFOpenLLMv2/meta/oopere_Llama-FinSent-S/8b9ec467-1555-415c-b1ee-23be18ded9e5.json deleted file mode 100644 index 67b46bbba8deed57860ba45f6a1793bd54ea256b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/oopere_Llama-FinSent-S/8b9ec467-1555-415c-b1ee-23be18ded9e5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/oopere_Llama-FinSent-S/1762652580.4263492", - "retrieved_timestamp": "1762652580.42635", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "oopere/Llama-FinSent-S", - "developer": "meta", - "inference_platform": "unknown", - "id": "oopere/Llama-FinSent-S" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2163980460733077 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3169254117559263 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3831770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11336436170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.914 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/oopere_Llama-FinSent-S/f99bad90-e7b2-4205-9f51-93f96e90188c.json b/leaderboard_data/HFOpenLLMv2/meta/oopere_Llama-FinSent-S/f99bad90-e7b2-4205-9f51-93f96e90188c.json deleted file mode 100644 index 7f183eed53dd18c58d0716461d138dd68cb4493c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/oopere_Llama-FinSent-S/f99bad90-e7b2-4205-9f51-93f96e90188c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/oopere_Llama-FinSent-S/1762652580.426095", - "retrieved_timestamp": "1762652580.426095", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "oopere/Llama-FinSent-S", - "developer": "meta", - "inference_platform": "unknown", - "id": "oopere/Llama-FinSent-S" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21187670935340452 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31562055310321474 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3832395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11303191489361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.914 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/oopere_pruned10-llama-3.2-3B/2ff7d218-348b-4069-808f-6b32e7a77a5b.json b/leaderboard_data/HFOpenLLMv2/meta/oopere_pruned10-llama-3.2-3B/2ff7d218-348b-4069-808f-6b32e7a77a5b.json deleted file mode 100644 index 57ecad51c996ce360df0d8a9b5f12a47a2b8dea0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/oopere_pruned10-llama-3.2-3B/2ff7d218-348b-4069-808f-6b32e7a77a5b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/oopere_pruned10-llama-3.2-3B/1762652580.426529", - "retrieved_timestamp": "1762652580.4265301", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "oopere/pruned10-llama-3.2-3B", - "developer": "meta", - "inference_platform": "unknown", - "id": "oopere/pruned10-llama-3.2-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17762980004166723 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3340421117164456 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3721666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16397938829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.001 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/oopere_pruned20-llama-1b/c86ed5b4-8793-424a-a5a2-9a54689cb388.json b/leaderboard_data/HFOpenLLMv2/meta/oopere_pruned20-llama-1b/c86ed5b4-8793-424a-a5a2-9a54689cb388.json deleted file mode 100644 index effa467602897163c2b7d48176918fecb48d9c5c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/oopere_pruned20-llama-1b/c86ed5b4-8793-424a-a5a2-9a54689cb388.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/oopere_pruned20-llama-1b/1762652580.426731", - "retrieved_timestamp": "1762652580.426732", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "oopere/pruned20-llama-1b", - "developer": "meta", - "inference_platform": "unknown", - "id": "oopere/pruned20-llama-1b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19936213690784896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30313627830972034 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36314583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228390957446809 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.075 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/oopere_pruned20-llama-3.2-3b/e0e6bdbd-91c2-4d45-be73-03890ed13709.json b/leaderboard_data/HFOpenLLMv2/meta/oopere_pruned20-llama-3.2-3b/e0e6bdbd-91c2-4d45-be73-03890ed13709.json deleted file mode 100644 index b0a9fd454913813fca63395fb0e978cd01ba5412..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/oopere_pruned20-llama-3.2-3b/e0e6bdbd-91c2-4d45-be73-03890ed13709.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/oopere_pruned20-llama-3.2-3b/1762652580.4269419", - "retrieved_timestamp": "1762652580.426943", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "oopere/pruned20-llama-3.2-3b", - "developer": "meta", - "inference_platform": "unknown", - "id": "oopere/pruned20-llama-3.2-3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17887870849346402 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32478483912909756 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34184375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12799202127659576 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.79 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/oopere_pruned40-llama-1b/0032ea65-98dc-48a9-90e7-835e389acecd.json b/leaderboard_data/HFOpenLLMv2/meta/oopere_pruned40-llama-1b/0032ea65-98dc-48a9-90e7-835e389acecd.json deleted file mode 100644 index 3c37e7dd7b635829dbe16d2c37b82870eb233a30..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/oopere_pruned40-llama-1b/0032ea65-98dc-48a9-90e7-835e389acecd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/oopere_pruned40-llama-1b/1762652580.427145", - "retrieved_timestamp": "1762652580.427145", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "oopere/pruned40-llama-1b", - "developer": "meta", - "inference_platform": "unknown", - "id": "oopere/pruned40-llama-1b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22843832143157933 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29691563801419935 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4286666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10821143617021277 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.914 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/oopere_pruned40-llama-3.2-1B/bae27b4d-4046-45f1-b798-8356fa962df4.json b/leaderboard_data/HFOpenLLMv2/meta/oopere_pruned40-llama-3.2-1B/bae27b4d-4046-45f1-b798-8356fa962df4.json deleted file mode 100644 index 19cf0bb382663f4b54c6f90e0ab8ba00c0e4d604..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/oopere_pruned40-llama-3.2-1B/bae27b4d-4046-45f1-b798-8356fa962df4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/oopere_pruned40-llama-3.2-1B/1762652580.427387", - "retrieved_timestamp": "1762652580.427387", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "oopere/pruned40-llama-3.2-1B", - "developer": "meta", - "inference_platform": "unknown", - "id": "oopere/pruned40-llama-3.2-1B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22663976028050017 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2982489713475327 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43523958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11145279255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.914 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/oopere_pruned40-llama-3.2-3b/97c9b209-b2ed-439f-9b01-cad25e205fa9.json b/leaderboard_data/HFOpenLLMv2/meta/oopere_pruned40-llama-3.2-3b/97c9b209-b2ed-439f-9b01-cad25e205fa9.json deleted file mode 100644 index c5d52fea595dab5053e4d92404c55bdb6a4e56f0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/oopere_pruned40-llama-3.2-3b/97c9b209-b2ed-439f-9b01-cad25e205fa9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/oopere_pruned40-llama-3.2-3b/1762652580.4275908", - "retrieved_timestamp": "1762652580.4275908", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "oopere/pruned40-llama-3.2-3b", - "developer": "meta", - "inference_platform": "unknown", - "id": "oopere/pruned40-llama-3.2-3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21829634259320824 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31671170280977073 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3539375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11768617021276596 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.367 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/oopere_pruned60-llama-1b/4c0ac526-821a-49eb-9eee-152d594ed25b.json b/leaderboard_data/HFOpenLLMv2/meta/oopere_pruned60-llama-1b/4c0ac526-821a-49eb-9eee-152d594ed25b.json deleted file mode 100644 index 787cc1884fbe3f5e2c129ac4766ff8982dff22a3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/oopere_pruned60-llama-1b/4c0ac526-821a-49eb-9eee-152d594ed25b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/oopere_pruned60-llama-1b/1762652580.4277859", - "retrieved_timestamp": "1762652580.4277859", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "oopere/pruned60-llama-1b", - "developer": "meta", - "inference_platform": "unknown", - "id": "oopere/pruned60-llama-1b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18285039251408486 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3016193474185398 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40879166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11727061170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.753 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/oopere_pruned60-llama-3.2-3b/219c6f49-3d48-4e1b-8105-fdf323b2fc3c.json b/leaderboard_data/HFOpenLLMv2/meta/oopere_pruned60-llama-3.2-3b/219c6f49-3d48-4e1b-8105-fdf323b2fc3c.json deleted file mode 100644 index cb3ab8472df784893dea05f76593ff173f3ef854..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/oopere_pruned60-llama-3.2-3b/219c6f49-3d48-4e1b-8105-fdf323b2fc3c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/oopere_pruned60-llama-3.2-3b/1762652580.42798", - "retrieved_timestamp": "1762652580.4279811", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "oopere/pruned60-llama-3.2-3b", - "developer": "meta", - "inference_platform": "unknown", - "id": "oopere/pruned60-llama-3.2-3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1824758307956223 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31662597093352013 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3633333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11311502659574468 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.944 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/orai-nlp_Llama-eus-8B/0ed99007-3e31-4c48-abe5-0cd94b95dcf4.json b/leaderboard_data/HFOpenLLMv2/meta/orai-nlp_Llama-eus-8B/0ed99007-3e31-4c48-abe5-0cd94b95dcf4.json deleted file mode 100644 index 56c8e1b91e7c77df33641a2495cbf09f2a0df1c2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/orai-nlp_Llama-eus-8B/0ed99007-3e31-4c48-abe5-0cd94b95dcf4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/orai-nlp_Llama-eus-8B/1762652580.43225", - "retrieved_timestamp": "1762652580.432275", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "orai-nlp/Llama-eus-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "orai-nlp/Llama-eus-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21612321972366655 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4418245490788701 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3918854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30576795212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/princeton-nlp_Llama-3-8B-ProLong-512k-Base/6c3d4b07-14c5-4218-862f-2aca386f5144.json b/leaderboard_data/HFOpenLLMv2/meta/princeton-nlp_Llama-3-8B-ProLong-512k-Base/6c3d4b07-14c5-4218-862f-2aca386f5144.json deleted file mode 100644 index 0b8983a5b77782bfc3a8ca5eae6bd6c6ff142016..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/princeton-nlp_Llama-3-8B-ProLong-512k-Base/6c3d4b07-14c5-4218-862f-2aca386f5144.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-8B-ProLong-512k-Base/1762652580.442863", - "retrieved_timestamp": "1762652580.4428642", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-8B-ProLong-512k-Base", - "developer": "meta", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-8B-ProLong-512k-Base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5322123077877808 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5033213133882991 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4222708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33294547872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/princeton-nlp_Llama-3-8B-ProLong-64k-Base/171a1779-0f17-4514-96ae-e4f9acea86b4.json b/leaderboard_data/HFOpenLLMv2/meta/princeton-nlp_Llama-3-8B-ProLong-64k-Base/171a1779-0f17-4514-96ae-e4f9acea86b4.json deleted file mode 100644 index 743518d9c403189cac00265ed8e650e2b56f4d4f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/princeton-nlp_Llama-3-8B-ProLong-64k-Base/171a1779-0f17-4514-96ae-e4f9acea86b4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-8B-ProLong-64k-Base/1762652580.443676", - "retrieved_timestamp": "1762652580.443677", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-8B-ProLong-64k-Base", - "developer": "meta", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-8B-ProLong-64k-Base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5200722970606879 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49271325981523906 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4340520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347739361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/princeton-nlp_Sheared-LLaMA-1.3B/578905fb-a4a6-4dcd-9b09-ff5289568b91.json b/leaderboard_data/HFOpenLLMv2/meta/princeton-nlp_Sheared-LLaMA-1.3B/578905fb-a4a6-4dcd-9b09-ff5289568b91.json deleted file mode 100644 index 8c448d76e8e143fef435e7e6902ca5a2d62b1fd1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/princeton-nlp_Sheared-LLaMA-1.3B/578905fb-a4a6-4dcd-9b09-ff5289568b91.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Sheared-LLaMA-1.3B/1762652580.4538639", - "retrieved_timestamp": "1762652580.453865", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Sheared-LLaMA-1.3B", - "developer": "meta", - "inference_platform": "unknown", - "id": "princeton-nlp/Sheared-LLaMA-1.3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2197702097102355 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31970467392464424 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23993288590604026 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3713020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11710438829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.3 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/princeton-nlp_Sheared-LLaMA-2.7B/3a0252c3-ced9-4cb4-94ef-d3800ac15ff9.json b/leaderboard_data/HFOpenLLMv2/meta/princeton-nlp_Sheared-LLaMA-2.7B/3a0252c3-ced9-4cb4-94ef-d3800ac15ff9.json deleted file mode 100644 index 4aba6265aae1c215c16a11f10ea22e4343082f24..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/princeton-nlp_Sheared-LLaMA-2.7B/3a0252c3-ced9-4cb4-94ef-d3800ac15ff9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Sheared-LLaMA-2.7B/1762652580.4540951", - "retrieved_timestamp": "1762652580.4540958", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Sheared-LLaMA-2.7B", - "developer": "meta", - "inference_platform": "unknown", - "id": "princeton-nlp/Sheared-LLaMA-2.7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24165214962964932 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32586855691245953 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3567291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11868351063829788 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 2.7 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/prithivMLmods_Deepthink-Llama-3-8B-Preview/020f77a1-1051-4f85-8037-ed4f8b12474a.json b/leaderboard_data/HFOpenLLMv2/meta/prithivMLmods_Deepthink-Llama-3-8B-Preview/020f77a1-1051-4f85-8037-ed4f8b12474a.json deleted file mode 100644 index f27f7379cd08ce91bac85a7e2f6cc442d66be477..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/prithivMLmods_Deepthink-Llama-3-8B-Preview/020f77a1-1051-4f85-8037-ed4f8b12474a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Deepthink-Llama-3-8B-Preview/1762652580.459939", - "retrieved_timestamp": "1762652580.459939", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Deepthink-Llama-3-8B-Preview", - "developer": "meta", - "inference_platform": "unknown", - "id": "prithivMLmods/Deepthink-Llama-3-8B-Preview" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29553252037926037 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4664510845126107 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3549848942598187 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37070833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2738530585106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/prithivMLmods_Llama-3.2-3B-Math-Oct/5ab1b41f-ee87-475c-b48b-e154c580d560.json b/leaderboard_data/HFOpenLLMv2/meta/prithivMLmods_Llama-3.2-3B-Math-Oct/5ab1b41f-ee87-475c-b48b-e154c580d560.json deleted file mode 100644 index a9c5ee4dfee528f201a9ff083cb37e362cdc35c1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/prithivMLmods_Llama-3.2-3B-Math-Oct/5ab1b41f-ee87-475c-b48b-e154c580d560.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-3.2-3B-Math-Oct/1762652580.464829", - "retrieved_timestamp": "1762652580.46483", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Llama-3.2-3B-Math-Oct", - "developer": "meta", - "inference_platform": "unknown", - "id": "prithivMLmods/Llama-3.2-3B-Math-Oct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4585233846194763 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4371840952508727 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34698958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911402925531915 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/prithivMLmods_Llama-3.2-6B-AlgoCode/914b588e-6da8-4a08-9313-ac7004fd8b97.json b/leaderboard_data/HFOpenLLMv2/meta/prithivMLmods_Llama-3.2-6B-AlgoCode/914b588e-6da8-4a08-9313-ac7004fd8b97.json deleted file mode 100644 index b30f58f367d206340f4fd94d951d248669171a33..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/prithivMLmods_Llama-3.2-6B-AlgoCode/914b588e-6da8-4a08-9313-ac7004fd8b97.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-3.2-6B-AlgoCode/1762652580.465046", - "retrieved_timestamp": "1762652580.465046", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Llama-3.2-6B-AlgoCode", - "developer": "meta", - "inference_platform": "unknown", - "id": "prithivMLmods/Llama-3.2-6B-AlgoCode" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21357553513566227 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37477424449567703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2869127516778524 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40134374999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17977061170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.339 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/prithivMLmods_Llama-8B-Distill-CoT/6b1d1057-0091-4e44-822f-f7c1e5dc3ce9.json b/leaderboard_data/HFOpenLLMv2/meta/prithivMLmods_Llama-8B-Distill-CoT/6b1d1057-0091-4e44-822f-f7c1e5dc3ce9.json deleted file mode 100644 index 9ffbb93561bb4d13fa88ba76a6b9ff31f9941987..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/prithivMLmods_Llama-8B-Distill-CoT/6b1d1057-0091-4e44-822f-f7c1e5dc3ce9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-8B-Distill-CoT/1762652580.465258", - "retrieved_timestamp": "1762652580.465258", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Llama-8B-Distill-CoT", - "developer": "meta", - "inference_platform": "unknown", - "id": "prithivMLmods/Llama-8B-Distill-CoT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3341511633576688 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4297620873695442 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3719791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.273188164893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/prithivMLmods_Llama-Deepsync-1B/5516c5d6-29c9-46dc-ae29-61876fb488c2.json b/leaderboard_data/HFOpenLLMv2/meta/prithivMLmods_Llama-Deepsync-1B/5516c5d6-29c9-46dc-ae29-61876fb488c2.json deleted file mode 100644 index 8f3dce9cea8cbff135cda04ac38ea582e0de3784..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/prithivMLmods_Llama-Deepsync-1B/5516c5d6-29c9-46dc-ae29-61876fb488c2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-Deepsync-1B/1762652580.4655502", - "retrieved_timestamp": "1762652580.4655511", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Llama-Deepsync-1B", - "developer": "meta", - "inference_platform": "unknown", - "id": "prithivMLmods/Llama-Deepsync-1B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3570071853792382 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33856262083940014 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35651041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17378656914893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/prithivMLmods_Llama-Deepsync-3B/fbdcf318-d1b5-4ed6-b13d-efb14dfaf09f.json b/leaderboard_data/HFOpenLLMv2/meta/prithivMLmods_Llama-Deepsync-3B/fbdcf318-d1b5-4ed6-b13d-efb14dfaf09f.json deleted file mode 100644 index 7cad03365a64c69ecb30fbe3760d953b23513506..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/prithivMLmods_Llama-Deepsync-3B/fbdcf318-d1b5-4ed6-b13d-efb14dfaf09f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-Deepsync-3B/1762652580.465787", - "retrieved_timestamp": "1762652580.465788", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Llama-Deepsync-3B", - "developer": "meta", - "inference_platform": "unknown", - "id": "prithivMLmods/Llama-Deepsync-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4302218114602588 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4291521655271033 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33238541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3031083776595745 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/prithivMLmods_Llama-Express.1-Math/99fd40d7-8d26-4088-ba03-1c1d7ed11ca0.json b/leaderboard_data/HFOpenLLMv2/meta/prithivMLmods_Llama-Express.1-Math/99fd40d7-8d26-4088-ba03-1c1d7ed11ca0.json deleted file mode 100644 index c67cac18a7d41ae1c7983461244c3bf66131cfc3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/prithivMLmods_Llama-Express.1-Math/99fd40d7-8d26-4088-ba03-1c1d7ed11ca0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-Express.1-Math/1762652580.466016", - "retrieved_timestamp": "1762652580.466017", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Llama-Express.1-Math", - "developer": "meta", - "inference_platform": "unknown", - "id": "prithivMLmods/Llama-Express.1-Math" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5084320713484665 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33638140090435265 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.055891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31434375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16098736702127658 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/pszemraj_Llama-3-6.3b-v0.1/74260e1f-8b2d-40ac-ac96-f268d65fa838.json b/leaderboard_data/HFOpenLLMv2/meta/pszemraj_Llama-3-6.3b-v0.1/74260e1f-8b2d-40ac-ac96-f268d65fa838.json deleted file mode 100644 index f3d5f02d081189e8961dc49ba9d75e570e8ca5e6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/pszemraj_Llama-3-6.3b-v0.1/74260e1f-8b2d-40ac-ac96-f268d65fa838.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pszemraj_Llama-3-6.3b-v0.1/1762652580.4812942", - "retrieved_timestamp": "1762652580.481295", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pszemraj/Llama-3-6.3b-v0.1", - "developer": "meta", - "inference_platform": "unknown", - "id": "pszemraj/Llama-3-6.3b-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10438968603305895 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41968070468284147 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3908333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2839926861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.3 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/qingy2019_LLaMa_3.2_3B_Catalysts/2fb27531-96ee-48d2-9416-43ef790d7196.json b/leaderboard_data/HFOpenLLMv2/meta/qingy2019_LLaMa_3.2_3B_Catalysts/2fb27531-96ee-48d2-9416-43ef790d7196.json deleted file mode 100644 index c6724893c05777cab64287d77581be870f1a1852..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/qingy2019_LLaMa_3.2_3B_Catalysts/2fb27531-96ee-48d2-9416-43ef790d7196.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2019_LLaMa_3.2_3B_Catalysts/1762652580.4818308", - "retrieved_timestamp": "1762652580.481832", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2019/LLaMa_3.2_3B_Catalysts", - "developer": "meta", - "inference_platform": "unknown", - "id": "qingy2019/LLaMa_3.2_3B_Catalysts" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.499239794855428 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44681268798954793 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12915407854984895 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37877083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30078125 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/qingy2019_OpenMath2-Llama3.1-8B/75da6225-cc30-480c-b33e-359648932d9d.json b/leaderboard_data/HFOpenLLMv2/meta/qingy2019_OpenMath2-Llama3.1-8B/75da6225-cc30-480c-b33e-359648932d9d.json deleted file mode 100644 index 046aab852f31c65457d1248cc553e84031ad28a9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/qingy2019_OpenMath2-Llama3.1-8B/75da6225-cc30-480c-b33e-359648932d9d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2019_OpenMath2-Llama3.1-8B/1762652580.482083", - "retrieved_timestamp": "1762652580.482084", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2019/OpenMath2-Llama3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "qingy2019/OpenMath2-Llama3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23305939352030391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40955241401694514 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2673716012084592 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34355208333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15533577127659576 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/refuelai_Llama-3-Refueled/2f104869-3a3b-4d25-987b-77dba089b817.json b/leaderboard_data/HFOpenLLMv2/meta/refuelai_Llama-3-Refueled/2f104869-3a3b-4d25-987b-77dba089b817.json deleted file mode 100644 index 32ded2d851cd0ef93046f3517ec5f86aba791905..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/refuelai_Llama-3-Refueled/2f104869-3a3b-4d25-987b-77dba089b817.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/refuelai_Llama-3-Refueled/1762652580.494146", - "retrieved_timestamp": "1762652580.494147", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "refuelai/Llama-3-Refueled", - "developer": "meta", - "inference_platform": "unknown", - "id": "refuelai/Llama-3-Refueled" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4619952836252255 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5870766201705051 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30950797872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/riaz_FineLlama-3.1-8B/55eb0438-f0bd-4f9d-8bff-577d0245a57c.json b/leaderboard_data/HFOpenLLMv2/meta/riaz_FineLlama-3.1-8B/55eb0438-f0bd-4f9d-8bff-577d0245a57c.json deleted file mode 100644 index 5b07ceb8e54c7ca0464e394bbe978c910332600b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/riaz_FineLlama-3.1-8B/55eb0438-f0bd-4f9d-8bff-577d0245a57c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/riaz_FineLlama-3.1-8B/1762652580.495657", - "retrieved_timestamp": "1762652580.495657", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "riaz/FineLlama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "riaz/FineLlama-3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43734070045257695 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45857296498013483 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3762916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29637632978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/riaz_FineLlama-3.1-8B/d5fb7571-bafd-424a-87f5-2d14ac7bd8d2.json b/leaderboard_data/HFOpenLLMv2/meta/riaz_FineLlama-3.1-8B/d5fb7571-bafd-424a-87f5-2d14ac7bd8d2.json deleted file mode 100644 index 608ac87d5165e6458a7acd5f633644d0308206c9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/riaz_FineLlama-3.1-8B/d5fb7571-bafd-424a-87f5-2d14ac7bd8d2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/riaz_FineLlama-3.1-8B/1762652580.4959512", - "retrieved_timestamp": "1762652580.495952", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "riaz/FineLlama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "riaz/FineLlama-3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.413660199382084 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.456451981676995 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37762500000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29778922872340424 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/rombodawg_rombos_Replete-Coder-Llama3-8B/af3522f6-e26f-491f-8ccc-df064e5d3010.json b/leaderboard_data/HFOpenLLMv2/meta/rombodawg_rombos_Replete-Coder-Llama3-8B/af3522f6-e26f-491f-8ccc-df064e5d3010.json deleted file mode 100644 index 75ceb415d825f0c41edbc795b8dae76edcc72bfe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/rombodawg_rombos_Replete-Coder-Llama3-8B/af3522f6-e26f-491f-8ccc-df064e5d3010.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rombodawg_rombos_Replete-Coder-Llama3-8B/1762652580.5000498", - "retrieved_timestamp": "1762652580.500051", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rombodawg/rombos_Replete-Coder-Llama3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "rombodawg/rombos_Replete-Coder-Llama3-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4714125187834945 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32762771025266835 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39663541666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13347739361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/sabersaleh_Llama2-7B-CPO/2ecc5d1d-edb7-4713-9bde-f83ab4736690.json b/leaderboard_data/HFOpenLLMv2/meta/sabersaleh_Llama2-7B-CPO/2ecc5d1d-edb7-4713-9bde-f83ab4736690.json deleted file mode 100644 index fcfc402188f9e7c828cc76b0238bfeb967daf367..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/sabersaleh_Llama2-7B-CPO/2ecc5d1d-edb7-4713-9bde-f83ab4736690.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sabersaleh_Llama2-7B-CPO/1762652580.502833", - "retrieved_timestamp": "1762652580.502836", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sabersaleh/Llama2-7B-CPO", - "developer": "meta", - "inference_platform": "unknown", - "id": "sabersaleh/Llama2-7B-CPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1545488193548673 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3457919655499851 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40482291666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1605718085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/sabersaleh_Llama2-7B-IPO/14deb011-b6ce-47c7-b855-c7ebcc291121.json b/leaderboard_data/HFOpenLLMv2/meta/sabersaleh_Llama2-7B-IPO/14deb011-b6ce-47c7-b855-c7ebcc291121.json deleted file mode 100644 index 4ddde947ee9575d3416c2a57ae614698e5729bc8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/sabersaleh_Llama2-7B-IPO/14deb011-b6ce-47c7-b855-c7ebcc291121.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sabersaleh_Llama2-7B-IPO/1762652580.503558", - "retrieved_timestamp": "1762652580.5035589", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sabersaleh/Llama2-7B-IPO", - "developer": "meta", - "inference_platform": "unknown", - "id": "sabersaleh/Llama2-7B-IPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17685518867715438 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3474552716912811 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4047604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16173537234042554 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/sabersaleh_Llama2-7B-KTO/0744b5c6-e109-4ccb-acc9-955106ef5562.json b/leaderboard_data/HFOpenLLMv2/meta/sabersaleh_Llama2-7B-KTO/0744b5c6-e109-4ccb-acc9-955106ef5562.json deleted file mode 100644 index eec0d38f82e86fcea392e184fffc42162fc9806f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/sabersaleh_Llama2-7B-KTO/0744b5c6-e109-4ccb-acc9-955106ef5562.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sabersaleh_Llama2-7B-KTO/1762652580.503802", - "retrieved_timestamp": "1762652580.5038028", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sabersaleh/Llama2-7B-KTO", - "developer": "meta", - "inference_platform": "unknown", - "id": "sabersaleh/Llama2-7B-KTO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15284999357260956 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35007577568366255 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41669791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1636469414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/sabersaleh_Llama2-7B-SPO/cfbdbc52-d846-48e7-bad4-f6240f1d2551.json b/leaderboard_data/HFOpenLLMv2/meta/sabersaleh_Llama2-7B-SPO/cfbdbc52-d846-48e7-bad4-f6240f1d2551.json deleted file mode 100644 index 974df6a73286fcd63257446fc688d870784d6421..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/sabersaleh_Llama2-7B-SPO/cfbdbc52-d846-48e7-bad4-f6240f1d2551.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sabersaleh_Llama2-7B-SPO/1762652580.504033", - "retrieved_timestamp": "1762652580.504034", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sabersaleh/Llama2-7B-SPO", - "developer": "meta", - "inference_platform": "unknown", - "id": "sabersaleh/Llama2-7B-SPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15667207453999832 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33834029554844597 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3874270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17569813829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/sabersaleh_Llama2-7B-SimPO/a530f116-e413-4d73-8d1f-2f44fcc0c6a9.json b/leaderboard_data/HFOpenLLMv2/meta/sabersaleh_Llama2-7B-SimPO/a530f116-e413-4d73-8d1f-2f44fcc0c6a9.json deleted file mode 100644 index b06f6b67aa4c2b3eb3c346e1c4bf7562271dc6b9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/sabersaleh_Llama2-7B-SimPO/a530f116-e413-4d73-8d1f-2f44fcc0c6a9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sabersaleh_Llama2-7B-SimPO/1762652580.504319", - "retrieved_timestamp": "1762652580.50432", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sabersaleh/Llama2-7B-SimPO", - "developer": "meta", - "inference_platform": "unknown", - "id": "sabersaleh/Llama2-7B-SimPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1658643510330368 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34891553101294254 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40069791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16414561170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/sabersaleh_Llama3/286860d2-7f43-4488-9d43-9058fe59b248.json b/leaderboard_data/HFOpenLLMv2/meta/sabersaleh_Llama3/286860d2-7f43-4488-9d43-9058fe59b248.json deleted file mode 100644 index 53e8dbaaf4a11671d06c2703c998869411987664..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/sabersaleh_Llama3/286860d2-7f43-4488-9d43-9058fe59b248.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sabersaleh_Llama3/1762652580.504582", - "retrieved_timestamp": "1762652580.504583", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sabersaleh/Llama3", - "developer": "meta", - "inference_platform": "unknown", - "id": "sabersaleh/Llama3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3320777758569484 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47821899796340944 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39334375000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.316156914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/sabersalehk_Llama3-001-300/f73009ad-891e-41e7-a6bc-a271894f5511.json b/leaderboard_data/HFOpenLLMv2/meta/sabersalehk_Llama3-001-300/f73009ad-891e-41e7-a6bc-a271894f5511.json deleted file mode 100644 index 0596b6cfe46434debbfb072cf4fa47175e7aded1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/sabersalehk_Llama3-001-300/f73009ad-891e-41e7-a6bc-a271894f5511.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sabersalehk_Llama3-001-300/1762652580.504826", - "retrieved_timestamp": "1762652580.504826", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sabersalehk/Llama3-001-300", - "developer": "meta", - "inference_platform": "unknown", - "id": "sabersalehk/Llama3-001-300" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3178643776291351 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47445771982516544 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40639583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3158244680851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/sabersalehk_Llama3-SimPO/b88f3d13-a8ed-4e23-86ec-1531c3151f0f.json b/leaderboard_data/HFOpenLLMv2/meta/sabersalehk_Llama3-SimPO/b88f3d13-a8ed-4e23-86ec-1531c3151f0f.json deleted file mode 100644 index 6fb7a5b6c59d42bb10ff3b7a72d898e6d3c08ce5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/sabersalehk_Llama3-SimPO/b88f3d13-a8ed-4e23-86ec-1531c3151f0f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sabersalehk_Llama3-SimPO/1762652580.505101", - "retrieved_timestamp": "1762652580.5051022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sabersalehk/Llama3-SimPO", - "developer": "meta", - "inference_platform": "unknown", - "id": "sabersalehk/Llama3-SimPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36420142998355476 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48735382942408356 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40459375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3156582446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/sabersalehk_Llama3_001_200/f673b2f9-8b77-42a3-9066-29f21a1ca0f8.json b/leaderboard_data/HFOpenLLMv2/meta/sabersalehk_Llama3_001_200/f673b2f9-8b77-42a3-9066-29f21a1ca0f8.json deleted file mode 100644 index 2a0c5b438ba7bd7b315cfd281f222dd940f26b63..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/sabersalehk_Llama3_001_200/f673b2f9-8b77-42a3-9066-29f21a1ca0f8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sabersalehk_Llama3_001_200/1762652580.505313", - "retrieved_timestamp": "1762652580.505314", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sabersalehk/Llama3_001_200", - "developer": "meta", - "inference_platform": "unknown", - "id": "sabersalehk/Llama3_001_200" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.321836061649756 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4727921518419169 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4037291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31831781914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/sabersalehk_Llama3_01_300/55ae7ee9-2c50-45d6-ac0e-7c07bbad9a00.json b/leaderboard_data/HFOpenLLMv2/meta/sabersalehk_Llama3_01_300/55ae7ee9-2c50-45d6-ac0e-7c07bbad9a00.json deleted file mode 100644 index 420e22a7a3bf0c5391578994409cf3bbcff10d51..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/sabersalehk_Llama3_01_300/55ae7ee9-2c50-45d6-ac0e-7c07bbad9a00.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sabersalehk_Llama3_01_300/1762652580.505522", - "retrieved_timestamp": "1762652580.505523", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sabersalehk/Llama3_01_300", - "developer": "meta", - "inference_platform": "unknown", - "id": "sabersalehk/Llama3_01_300" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2958827023408999 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4691387139601247 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40648958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31241688829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/sakhan10_quantized_open_llama_3b_v2/f96ce5a9-7cc2-4380-9285-09052b906411.json b/leaderboard_data/HFOpenLLMv2/meta/sakhan10_quantized_open_llama_3b_v2/f96ce5a9-7cc2-4380-9285-09052b906411.json deleted file mode 100644 index 2ef1a8c79f67bc3bf4e73c1e2c0a2ffb1b1e3d0e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/sakhan10_quantized_open_llama_3b_v2/f96ce5a9-7cc2-4380-9285-09052b906411.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sakhan10_quantized_open_llama_3b_v2/1762652580.507647", - "retrieved_timestamp": "1762652580.507648", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sakhan10/quantized_open_llama_3b_v2", - "developer": "meta", - "inference_platform": "unknown", - "id": "sakhan10/quantized_open_llama_3b_v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18722212618075595 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3019800780121471 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3681666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10954122340425532 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/sequelbox_Llama3.1-70B-PlumChat/ab796471-db79-40a2-8147-72ed7099b355.json b/leaderboard_data/HFOpenLLMv2/meta/sequelbox_Llama3.1-70B-PlumChat/ab796471-db79-40a2-8147-72ed7099b355.json deleted file mode 100644 index d7715792e74d3241a986158a8e50d61f7e4d02f3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/sequelbox_Llama3.1-70B-PlumChat/ab796471-db79-40a2-8147-72ed7099b355.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sequelbox_Llama3.1-70B-PlumChat/1762652580.5115242", - "retrieved_timestamp": "1762652580.5115242", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sequelbox/Llama3.1-70B-PlumChat", - "developer": "meta", - "inference_platform": "unknown", - "id": "sequelbox/Llama3.1-70B-PlumChat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5616131863455631 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6752815345736151 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028700906344411 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39093959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47737500000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.516373005319149 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/sequelbox_Llama3.1-8B-MOTH/3a820ba4-bdd8-4caf-a90a-d7e9fee52997.json b/leaderboard_data/HFOpenLLMv2/meta/sequelbox_Llama3.1-8B-MOTH/3a820ba4-bdd8-4caf-a90a-d7e9fee52997.json deleted file mode 100644 index f07ce83457362fa6db7380d9f2dfb27be30438d0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/sequelbox_Llama3.1-8B-MOTH/3a820ba4-bdd8-4caf-a90a-d7e9fee52997.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sequelbox_Llama3.1-8B-MOTH/1762652580.511786", - "retrieved_timestamp": "1762652580.511787", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sequelbox/Llama3.1-8B-MOTH", - "developer": "meta", - "inference_platform": "unknown", - "id": "sequelbox/Llama3.1-8B-MOTH" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5244938984117696 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.490246673015408 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3689166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338597074468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/sequelbox_Llama3.1-8B-PlumChat/32f38aeb-615c-4785-a674-bd8a50eb1057.json b/leaderboard_data/HFOpenLLMv2/meta/sequelbox_Llama3.1-8B-PlumChat/32f38aeb-615c-4785-a674-bd8a50eb1057.json deleted file mode 100644 index 4ef2105c1bb443800cbb1c04ae55a3ad2a31767c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/sequelbox_Llama3.1-8B-PlumChat/32f38aeb-615c-4785-a674-bd8a50eb1057.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sequelbox_Llama3.1-8B-PlumChat/1762652580.512009", - "retrieved_timestamp": "1762652580.51201", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sequelbox/Llama3.1-8B-PlumChat", - "developer": "meta", - "inference_platform": "unknown", - "id": "sequelbox/Llama3.1-8B-PlumChat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42427647530773904 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873291395699702 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03625377643504532 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3754583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21268284574468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/sequelbox_Llama3.1-8B-PlumCode/2695c341-eabe-4809-9b87-9e771e1ee9d6.json b/leaderboard_data/HFOpenLLMv2/meta/sequelbox_Llama3.1-8B-PlumCode/2695c341-eabe-4809-9b87-9e771e1ee9d6.json deleted file mode 100644 index ac6c424e28152595d58fc47d23735f85d540d8cb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/sequelbox_Llama3.1-8B-PlumCode/2695c341-eabe-4809-9b87-9e771e1ee9d6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sequelbox_Llama3.1-8B-PlumCode/1762652580.512235", - "retrieved_timestamp": "1762652580.512235", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sequelbox/Llama3.1-8B-PlumCode", - "developer": "meta", - "inference_platform": "unknown", - "id": "sequelbox/Llama3.1-8B-PlumCode" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20448299401144518 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3368086861425416 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37734375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23354388297872342 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/sequelbox_Llama3.1-8B-PlumMath/4734bf79-d464-43b4-8df3-1937f7c37796.json b/leaderboard_data/HFOpenLLMv2/meta/sequelbox_Llama3.1-8B-PlumMath/4734bf79-d464-43b4-8df3-1937f7c37796.json deleted file mode 100644 index b28e12f1794ea93b433503844aa17a3c433e8370..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/sequelbox_Llama3.1-8B-PlumMath/4734bf79-d464-43b4-8df3-1937f7c37796.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sequelbox_Llama3.1-8B-PlumMath/1762652580.512456", - "retrieved_timestamp": "1762652580.512456", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sequelbox/Llama3.1-8B-PlumMath", - "developer": "meta", - "inference_platform": "unknown", - "id": "sequelbox/Llama3.1-8B-PlumMath" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.224241678745728 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40323023090048143 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39185416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29753989361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/sethuiyer_LlamaZero-3.1-8B-Experimental-1208/abebe996-35e4-4fa6-a16c-0b33481d7357.json b/leaderboard_data/HFOpenLLMv2/meta/sethuiyer_LlamaZero-3.1-8B-Experimental-1208/abebe996-35e4-4fa6-a16c-0b33481d7357.json deleted file mode 100644 index 21c392f8d44abc73ec7fb4a306e3834a84767e0c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/sethuiyer_LlamaZero-3.1-8B-Experimental-1208/abebe996-35e4-4fa6-a16c-0b33481d7357.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sethuiyer_LlamaZero-3.1-8B-Experimental-1208/1762652580.5134048", - "retrieved_timestamp": "1762652580.513406", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sethuiyer/LlamaZero-3.1-8B-Experimental-1208", - "developer": "meta", - "inference_platform": "unknown", - "id": "sethuiyer/LlamaZero-3.1-8B-Experimental-1208" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6051022398347496 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49813698712445653 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38199999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2999501329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/skumar9_Llama-medx_v2/1bfc4a7a-2ac8-4454-bbee-0db62608ce5a.json b/leaderboard_data/HFOpenLLMv2/meta/skumar9_Llama-medx_v2/1bfc4a7a-2ac8-4454-bbee-0db62608ce5a.json deleted file mode 100644 index 30c546fb4e147437d04c178a9e4aecad87de06cf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/skumar9_Llama-medx_v2/1bfc4a7a-2ac8-4454-bbee-0db62608ce5a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/skumar9_Llama-medx_v2/1762652580.517576", - "retrieved_timestamp": "1762652580.517576", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "skumar9/Llama-medx_v2", - "developer": "meta", - "inference_platform": "unknown", - "id": "skumar9/Llama-medx_v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4462337708391512 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4908589512175783 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09138972809667674 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36612500000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34632646276595747 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/suayptalha_DeepSeek-R1-Distill-Llama-3B/4146ffb5-ac76-43b7-acdc-8c181f2c60d2.json b/leaderboard_data/HFOpenLLMv2/meta/suayptalha_DeepSeek-R1-Distill-Llama-3B/4146ffb5-ac76-43b7-acdc-8c181f2c60d2.json deleted file mode 100644 index 12b40f159cdc097afffff9084c801c3aac8e1841..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/suayptalha_DeepSeek-R1-Distill-Llama-3B/4146ffb5-ac76-43b7-acdc-8c181f2c60d2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/suayptalha_DeepSeek-R1-Distill-Llama-3B/1762652580.543217", - "retrieved_timestamp": "1762652580.543217", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "suayptalha/DeepSeek-R1-Distill-Llama-3B", - "developer": "meta", - "inference_platform": "unknown", - "id": "suayptalha/DeepSeek-R1-Distill-Llama-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7092658590318134 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44517853159705956 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20921450151057402 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33958333333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29778922872340424 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/suayptalha_Komodo-Llama-3.2-3B-v2-fp16/d86e291c-cc26-475c-9ccd-e3ee68e8bee2.json b/leaderboard_data/HFOpenLLMv2/meta/suayptalha_Komodo-Llama-3.2-3B-v2-fp16/d86e291c-cc26-475c-9ccd-e3ee68e8bee2.json deleted file mode 100644 index 07d8343cc33ca6686b85afca233c8481725a0446..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/suayptalha_Komodo-Llama-3.2-3B-v2-fp16/d86e291c-cc26-475c-9ccd-e3ee68e8bee2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/suayptalha_Komodo-Llama-3.2-3B-v2-fp16/1762652580.543882", - "retrieved_timestamp": "1762652580.543883", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "suayptalha/Komodo-Llama-3.2-3B-v2-fp16", - "developer": "meta", - "inference_platform": "unknown", - "id": "suayptalha/Komodo-Llama-3.2-3B-v2-fp16" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6340532010620709 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43549964909074995 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34057291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523936170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/sumink_flflmillama/19f198e5-37b8-4d62-8cbe-849f6875d39e.json b/leaderboard_data/HFOpenLLMv2/meta/sumink_flflmillama/19f198e5-37b8-4d62-8cbe-849f6875d39e.json deleted file mode 100644 index c1e5c5c72a42c6caa58bb79f9e918f1210025ef7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/sumink_flflmillama/19f198e5-37b8-4d62-8cbe-849f6875d39e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sumink_flflmillama/1762652580.5473018", - "retrieved_timestamp": "1762652580.5473018", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sumink/flflmillama", - "developer": "meta", - "inference_platform": "unknown", - "id": "sumink/flflmillama" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16756317681529453 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38511286094747693 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35911458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20960771276595744 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/sumink_llamaft/a13b4873-22c0-461a-b4ba-41246ede0dfa.json b/leaderboard_data/HFOpenLLMv2/meta/sumink_llamaft/a13b4873-22c0-461a-b4ba-41246ede0dfa.json deleted file mode 100644 index f1dfe249c2351058e5f424c1e5bab38462ec3b42..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/sumink_llamaft/a13b4873-22c0-461a-b4ba-41246ede0dfa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sumink_llamaft/1762652580.547796", - "retrieved_timestamp": "1762652580.547797", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sumink/llamaft", - "developer": "meta", - "inference_platform": "unknown", - "id": "sumink/llamaft" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16086871722584964 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3762775648269859 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3498125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21143617021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/sumink_llamamerge/f7406d3e-dbfa-4f12-946e-f4e58c728fa8.json b/leaderboard_data/HFOpenLLMv2/meta/sumink_llamamerge/f7406d3e-dbfa-4f12-946e-f4e58c728fa8.json deleted file mode 100644 index d7b6ae7f17560fbf3ef561092980802a07aec1b6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/sumink_llamamerge/f7406d3e-dbfa-4f12-946e-f4e58c728fa8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sumink_llamamerge/1762652580.547998", - "retrieved_timestamp": "1762652580.547999", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sumink/llamamerge", - "developer": "meta", - "inference_platform": "unknown", - "id": "sumink/llamamerge" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26718107953563214 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46316160070587903 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42397916666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2589760638297872 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.016 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/tenyx_Llama3-TenyxChat-70B/6fc094c0-ca29-4594-b086-2dae90195e8d.json b/leaderboard_data/HFOpenLLMv2/meta/tenyx_Llama3-TenyxChat-70B/6fc094c0-ca29-4594-b086-2dae90195e8d.json deleted file mode 100644 index 088c9e1af4fb80a19b48999e2c18be6ec9904b69..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/tenyx_Llama3-TenyxChat-70B/6fc094c0-ca29-4594-b086-2dae90195e8d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tenyx_Llama3-TenyxChat-70B/1762652580.5593112", - "retrieved_timestamp": "1762652580.5593119", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tenyx/Llama3-TenyxChat-70B", - "developer": "meta", - "inference_platform": "unknown", - "id": "tenyx/Llama3-TenyxChat-70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8087086707713311 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6511486901811531 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23564954682779457 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42603125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5210272606382979 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/theprint_CleverBoi-Llama-3.1-8B-v2/42ea4b8d-98af-4c57-8b55-cef38c473fd5.json b/leaderboard_data/HFOpenLLMv2/meta/theprint_CleverBoi-Llama-3.1-8B-v2/42ea4b8d-98af-4c57-8b55-cef38c473fd5.json deleted file mode 100644 index 10f382f3ce149abea580d9cadfb8c2b9021afb3e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/theprint_CleverBoi-Llama-3.1-8B-v2/42ea4b8d-98af-4c57-8b55-cef38c473fd5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/theprint_CleverBoi-Llama-3.1-8B-v2/1762652580.560884", - "retrieved_timestamp": "1762652580.560884", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "theprint/CleverBoi-Llama-3.1-8B-v2", - "developer": "meta", - "inference_platform": "unknown", - "id": "theprint/CleverBoi-Llama-3.1-8B-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19613957632415324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46678160110644784 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37346875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31881648936170215 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 9.3 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/theprint_Code-Llama-Bagel-8B/3a63b21d-0aaa-45d5-ae12-6d6c9777edbe.json b/leaderboard_data/HFOpenLLMv2/meta/theprint_Code-Llama-Bagel-8B/3a63b21d-0aaa-45d5-ae12-6d6c9777edbe.json deleted file mode 100644 index 3a01dfedda77005045a2ab788c1c2916877cdb41..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/theprint_Code-Llama-Bagel-8B/3a63b21d-0aaa-45d5-ae12-6d6c9777edbe.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/theprint_Code-Llama-Bagel-8B/1762652580.561388", - "retrieved_timestamp": "1762652580.5613928", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "theprint/Code-Llama-Bagel-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "theprint/Code-Llama-Bagel-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2529676813078188 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46974200049001086 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3679791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28216422872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/theprint_Llama-3.2-3B-VanRossum/78e423de-2f66-4c53-8d07-8401802973ca.json b/leaderboard_data/HFOpenLLMv2/meta/theprint_Llama-3.2-3B-VanRossum/78e423de-2f66-4c53-8d07-8401802973ca.json deleted file mode 100644 index 10af06d618eb7295fe9c8c88ec9e8d5b9bda5f68..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/theprint_Llama-3.2-3B-VanRossum/78e423de-2f66-4c53-8d07-8401802973ca.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/theprint_Llama-3.2-3B-VanRossum/1762652580.562204", - "retrieved_timestamp": "1762652580.562206", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "theprint/Llama-3.2-3B-VanRossum", - "developer": "meta", - "inference_platform": "unknown", - "id": "theprint/Llama-3.2-3B-VanRossum" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4782820693537591 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42787418229776697 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3441666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27701130319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 3.696 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/theprint_ReWiz-Llama-3.1-8B-v2/e57e6483-7e4c-4a64-8c58-890aafb38f37.json b/leaderboard_data/HFOpenLLMv2/meta/theprint_ReWiz-Llama-3.1-8B-v2/e57e6483-7e4c-4a64-8c58-890aafb38f37.json deleted file mode 100644 index 0466ea2e7e5a69bedc070e7c7b86c6f225634240..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/theprint_ReWiz-Llama-3.1-8B-v2/e57e6483-7e4c-4a64-8c58-890aafb38f37.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/theprint_ReWiz-Llama-3.1-8B-v2/1762652580.5627892", - "retrieved_timestamp": "1762652580.56279", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "theprint/ReWiz-Llama-3.1-8B-v2", - "developer": "meta", - "inference_platform": "unknown", - "id": "theprint/ReWiz-Llama-3.1-8B-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23790542427425895 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46324275457450953 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.381375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3310339095744681 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 9.3 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/theprint_ReWiz-Llama-3.2-3B/17d4fced-6a93-4e5e-8349-25dae16596f8.json b/leaderboard_data/HFOpenLLMv2/meta/theprint_ReWiz-Llama-3.2-3B/17d4fced-6a93-4e5e-8349-25dae16596f8.json deleted file mode 100644 index 114ee75b2dd46d401cb4823be2214378a0dfa432..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/theprint_ReWiz-Llama-3.2-3B/17d4fced-6a93-4e5e-8349-25dae16596f8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/theprint_ReWiz-Llama-3.2-3B/1762652580.5630422", - "retrieved_timestamp": "1762652580.563043", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "theprint/ReWiz-Llama-3.2-3B", - "developer": "meta", - "inference_platform": "unknown", - "id": "theprint/ReWiz-Llama-3.2-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4648931501748693 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343257577815292 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1095166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.361375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28873005319148937 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/togethercomputer_LLaMA-2-7B-32K/29dae40d-4786-4fbc-92fa-3415b0c35488.json b/leaderboard_data/HFOpenLLMv2/meta/togethercomputer_LLaMA-2-7B-32K/29dae40d-4786-4fbc-92fa-3415b0c35488.json deleted file mode 100644 index 95cb485e684609b0f94571779c0e00c3f6d893bf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/togethercomputer_LLaMA-2-7B-32K/29dae40d-4786-4fbc-92fa-3415b0c35488.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/togethercomputer_LLaMA-2-7B-32K/1762652580.574694", - "retrieved_timestamp": "1762652580.5746949", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "togethercomputer/LLaMA-2-7B-32K", - "developer": "meta", - "inference_platform": "unknown", - "id": "togethercomputer/LLaMA-2-7B-32K" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18649738250065384 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33995175217301715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3753645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17677859042553193 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/trthminh1112_autotrain-llama32-1b-finetune/cad93026-baf2-47ef-a554-4d0ba0d5a946.json b/leaderboard_data/HFOpenLLMv2/meta/trthminh1112_autotrain-llama32-1b-finetune/cad93026-baf2-47ef-a554-4d0ba0d5a946.json deleted file mode 100644 index 72db8d5cec86773178d1039bd55f29054c2d442b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/trthminh1112_autotrain-llama32-1b-finetune/cad93026-baf2-47ef-a554-4d0ba0d5a946.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/trthminh1112_autotrain-llama32-1b-finetune/1762652580.577601", - "retrieved_timestamp": "1762652580.5776021", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "trthminh1112/autotrain-llama32-1b-finetune", - "developer": "meta", - "inference_platform": "unknown", - "id": "trthminh1112/autotrain-llama32-1b-finetune" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17685518867715438 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29956269409410674 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35127083333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10987367021276596 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/uukuguy_speechless-codellama-34b-v2.0/ddcf1dc2-5281-4d14-b870-7ed2fa44c8d0.json b/leaderboard_data/HFOpenLLMv2/meta/uukuguy_speechless-codellama-34b-v2.0/ddcf1dc2-5281-4d14-b870-7ed2fa44c8d0.json deleted file mode 100644 index 081b9407ec65b2a3f24630188c609a36c2a5f781..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/uukuguy_speechless-codellama-34b-v2.0/ddcf1dc2-5281-4d14-b870-7ed2fa44c8d0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/uukuguy_speechless-codellama-34b-v2.0/1762652580.5824919", - "retrieved_timestamp": "1762652580.5824928", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "uukuguy/speechless-codellama-34b-v2.0", - "developer": "meta", - "inference_platform": "unknown", - "id": "uukuguy/speechless-codellama-34b-v2.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46042168113937687 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4813126697444618 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2692953020134229 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37870833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25423869680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/uukuguy_speechless-llama2-hermes-orca-platypus-wizardlm-13b/e9556ee4-63e8-4e0b-88df-62cc6c62c65a.json b/leaderboard_data/HFOpenLLMv2/meta/uukuguy_speechless-llama2-hermes-orca-platypus-wizardlm-13b/e9556ee4-63e8-4e0b-88df-62cc6c62c65a.json deleted file mode 100644 index 04c80bdd27e5128bb3d633bc451e1a8b5fbd39f4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/uukuguy_speechless-llama2-hermes-orca-platypus-wizardlm-13b/e9556ee4-63e8-4e0b-88df-62cc6c62c65a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/uukuguy_speechless-llama2-hermes-orca-platypus-wizardlm-13b/1762652580.5833302", - "retrieved_timestamp": "1762652580.583331", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b", - "developer": "meta", - "inference_platform": "unknown", - "id": "uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45617517076911485 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48455373040676664 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4655 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25590093085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.016 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/vhab10_llama-3-8b-merged-linear/deed0e49-b9fd-4623-bb90-3e885bec9bb0.json b/leaderboard_data/HFOpenLLMv2/meta/vhab10_llama-3-8b-merged-linear/deed0e49-b9fd-4623-bb90-3e885bec9bb0.json deleted file mode 100644 index 3b4af54541369b07874252c7b7931c19159b1cfb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/vhab10_llama-3-8b-merged-linear/deed0e49-b9fd-4623-bb90-3e885bec9bb0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/vhab10_llama-3-8b-merged-linear/1762652580.5860548", - "retrieved_timestamp": "1762652580.5860548", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "vhab10/llama-3-8b-merged-linear", - "developer": "meta", - "inference_platform": "unknown", - "id": "vhab10/llama-3-8b-merged-linear" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5916634529714491 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49370937443498536 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08157099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4190520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37042885638297873 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.65 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/vicgalle_Configurable-Hermes-2-Pro-Llama-3-8B/469379ff-5526-44f4-be9b-8bf6185b917e.json b/leaderboard_data/HFOpenLLMv2/meta/vicgalle_Configurable-Hermes-2-Pro-Llama-3-8B/469379ff-5526-44f4-be9b-8bf6185b917e.json deleted file mode 100644 index 25522f83d4d95bbc2bdbf027323c8f439eb75d1d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/vicgalle_Configurable-Hermes-2-Pro-Llama-3-8B/469379ff-5526-44f4-be9b-8bf6185b917e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/vicgalle_Configurable-Hermes-2-Pro-Llama-3-8B/1762652580.5867279", - "retrieved_timestamp": "1762652580.586729", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "vicgalle/Configurable-Hermes-2-Pro-Llama-3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "vicgalle/Configurable-Hermes-2-Pro-Llama-3-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5762510139762497 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5054841203275775 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4183645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3097573138297872 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.031 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/vicgalle_Humanish-RP-Llama-3.1-8B/3b0e49aa-931b-4625-8e59-fed02b31372e.json b/leaderboard_data/HFOpenLLMv2/meta/vicgalle_Humanish-RP-Llama-3.1-8B/3b0e49aa-931b-4625-8e59-fed02b31372e.json deleted file mode 100644 index 857f373ed24420e93e6f1117ed2899cdf300c459..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/vicgalle_Humanish-RP-Llama-3.1-8B/3b0e49aa-931b-4625-8e59-fed02b31372e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/vicgalle_Humanish-RP-Llama-3.1-8B/1762652580.587956", - "retrieved_timestamp": "1762652580.587957", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "vicgalle/Humanish-RP-Llama-3.1-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "vicgalle/Humanish-RP-Llama-3.1-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6669259786256023 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5100385476143247 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15181268882175228 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39520833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34765625 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/vicgalle_Roleplay-Llama-3-8B/89bafcc1-b175-45ec-b365-45938c1e8f33.json b/leaderboard_data/HFOpenLLMv2/meta/vicgalle_Roleplay-Llama-3-8B/89bafcc1-b175-45ec-b365-45938c1e8f33.json deleted file mode 100644 index bd7b35765c1748a4fd513fc664726c7d59c7bd4b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/vicgalle_Roleplay-Llama-3-8B/89bafcc1-b175-45ec-b365-45938c1e8f33.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/vicgalle_Roleplay-Llama-3-8B/1762652580.5885959", - "retrieved_timestamp": "1762652580.588597", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "vicgalle/Roleplay-Llama-3-8B", - "developer": "meta", - "inference_platform": "unknown", - "id": "vicgalle/Roleplay-Llama-3-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7320221456845614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5012318206922323 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09138972809667674 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3528854166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.370844414893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/viettelsecurity-ai_security-llama3.2-3b/2176e0d8-e0a5-4118-b15f-b272dc643d89.json b/leaderboard_data/HFOpenLLMv2/meta/viettelsecurity-ai_security-llama3.2-3b/2176e0d8-e0a5-4118-b15f-b272dc643d89.json deleted file mode 100644 index a41d7ecfaf4cc37271c9eef7655b296879a13c93..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/viettelsecurity-ai_security-llama3.2-3b/2176e0d8-e0a5-4118-b15f-b272dc643d89.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/viettelsecurity-ai_security-llama3.2-3b/1762652580.588792", - "retrieved_timestamp": "1762652580.588792", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "viettelsecurity-ai/security-llama3.2-3b", - "developer": "meta", - "inference_platform": "unknown", - "id": "viettelsecurity-ai/security-llama3.2-3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5908888416069362 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44005776161052806 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12613293051359517 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33790625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2837433510638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/winglian_Llama-3-8b-64k-PoSE/76bbd348-21b9-4253-8085-d8c4eb0932f6.json b/leaderboard_data/HFOpenLLMv2/meta/winglian_Llama-3-8b-64k-PoSE/76bbd348-21b9-4253-8085-d8c4eb0932f6.json deleted file mode 100644 index ef5cd48c593568f793353b6ddfe06c7d48b539e7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/winglian_Llama-3-8b-64k-PoSE/76bbd348-21b9-4253-8085-d8c4eb0932f6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/winglian_Llama-3-8b-64k-PoSE/1762652580.595902", - "retrieved_timestamp": "1762652580.595903", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "winglian/Llama-3-8b-64k-PoSE", - "developer": "meta", - "inference_platform": "unknown", - "id": "winglian/Llama-3-8b-64k-PoSE" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28569085581811815 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37021796005121793 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33955208333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2466755319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/winglian_llama-3-8b-256k-PoSE/5077856e-f85c-4395-8be9-e3e9bf3655cb.json b/leaderboard_data/HFOpenLLMv2/meta/winglian_llama-3-8b-256k-PoSE/5077856e-f85c-4395-8be9-e3e9bf3655cb.json deleted file mode 100644 index d895fdc44920269f953efc8075c08a57405406e5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/winglian_llama-3-8b-256k-PoSE/5077856e-f85c-4395-8be9-e3e9bf3655cb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/winglian_llama-3-8b-256k-PoSE/1762652580.5961442", - "retrieved_timestamp": "1762652580.596145", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "winglian/llama-3-8b-256k-PoSE", - "developer": "meta", - "inference_platform": "unknown", - "id": "winglian/llama-3-8b-256k-PoSE" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2909114482905358 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3156583397739859 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33155208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1116190159574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/xinchen9_Llama3.1_8B_Instruct_CoT/eddb5bfc-d5ae-44bc-8ffd-b1d318b0e3d2.json b/leaderboard_data/HFOpenLLMv2/meta/xinchen9_Llama3.1_8B_Instruct_CoT/eddb5bfc-d5ae-44bc-8ffd-b1d318b0e3d2.json deleted file mode 100644 index 72b59a84833913da550ad4f089f3f8ff0e426e4a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/xinchen9_Llama3.1_8B_Instruct_CoT/eddb5bfc-d5ae-44bc-8ffd-b1d318b0e3d2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/xinchen9_Llama3.1_8B_Instruct_CoT/1762652580.5972009", - "retrieved_timestamp": "1762652580.5972018", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "xinchen9/Llama3.1_8B_Instruct_CoT", - "developer": "meta", - "inference_platform": "unknown", - "id": "xinchen9/Llama3.1_8B_Instruct_CoT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2973565694579272 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4398206147249642 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43706249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2878989361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/xinchen9_Llama3.1_CoT/4ccfc9fe-c222-490e-badd-bfeecc9ede91.json b/leaderboard_data/HFOpenLLMv2/meta/xinchen9_Llama3.1_CoT/4ccfc9fe-c222-490e-badd-bfeecc9ede91.json deleted file mode 100644 index 6f77acd2e35623b3e2d34cb099804d0f9611fc97..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/xinchen9_Llama3.1_CoT/4ccfc9fe-c222-490e-badd-bfeecc9ede91.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/xinchen9_Llama3.1_CoT/1762652580.597471", - "retrieved_timestamp": "1762652580.597472", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "xinchen9/Llama3.1_CoT", - "developer": "meta", - "inference_platform": "unknown", - "id": "xinchen9/Llama3.1_CoT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22461624046419057 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43410143664277245 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43045833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2738530585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/xinchen9_Llama3.1_CoT_V1/501bff5b-2809-4af7-9600-d6471167b701.json b/leaderboard_data/HFOpenLLMv2/meta/xinchen9_Llama3.1_CoT_V1/501bff5b-2809-4af7-9600-d6471167b701.json deleted file mode 100644 index 9c662cccea1a5585d6c1d4fde3410850c6119fed..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/xinchen9_Llama3.1_CoT_V1/501bff5b-2809-4af7-9600-d6471167b701.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/xinchen9_Llama3.1_CoT_V1/1762652580.597682", - "retrieved_timestamp": "1762652580.597683", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "xinchen9/Llama3.1_CoT_V1", - "developer": "meta", - "inference_platform": "unknown", - "id": "xinchen9/Llama3.1_CoT_V1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2452991396162183 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4376001847280673 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45721875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2805019946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/yuvraj17_Llama3-8B-SuperNova-Spectrum-dare_ties/2bde390d-b448-4ac2-addd-215d722aa66b.json b/leaderboard_data/HFOpenLLMv2/meta/yuvraj17_Llama3-8B-SuperNova-Spectrum-dare_ties/2bde390d-b448-4ac2-addd-215d722aa66b.json deleted file mode 100644 index 8af2ccae390120a2cc53930acda0f07a01017afc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/yuvraj17_Llama3-8B-SuperNova-Spectrum-dare_ties/2bde390d-b448-4ac2-addd-215d722aa66b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/yuvraj17_Llama3-8B-SuperNova-Spectrum-dare_ties/1762652580.6118348", - "retrieved_timestamp": "1762652580.6118348", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "yuvraj17/Llama3-8B-SuperNova-Spectrum-dare_ties", - "developer": "meta", - "inference_platform": "unknown", - "id": "yuvraj17/Llama3-8B-SuperNova-Spectrum-dare_ties" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4012708502329375 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4615794426716074 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42109375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35738031914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/meta/yuvraj17_Llama3-8B-abliterated-Spectrum-slerp/45cd6db1-064f-45d9-89f2-d931b4f82326.json b/leaderboard_data/HFOpenLLMv2/meta/yuvraj17_Llama3-8B-abliterated-Spectrum-slerp/45cd6db1-064f-45d9-89f2-d931b4f82326.json deleted file mode 100644 index 125e0a919643fb86fd5b6efdd477ae8bcc65a75f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/meta/yuvraj17_Llama3-8B-abliterated-Spectrum-slerp/45cd6db1-064f-45d9-89f2-d931b4f82326.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/yuvraj17_Llama3-8B-abliterated-Spectrum-slerp/1762652580.6120949", - "retrieved_timestamp": "1762652580.612096", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "yuvraj17/Llama3-8B-abliterated-Spectrum-slerp", - "developer": "meta", - "inference_platform": "unknown", - "id": "yuvraj17/Llama3-8B-abliterated-Spectrum-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2884878788281759 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4977912063897858 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39982291666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32571476063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/1024m_PHI-4-Hindi/29f2c6ef-0685-43f9-800b-4f10ddc3ddf7.json b/leaderboard_data/HFOpenLLMv2/microsoft/1024m_PHI-4-Hindi/29f2c6ef-0685-43f9-800b-4f10ddc3ddf7.json deleted file mode 100644 index 442ed60745198504493c06540dbd0698a7a29ef5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/1024m_PHI-4-Hindi/29f2c6ef-0685-43f9-800b-4f10ddc3ddf7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/1024m_PHI-4-Hindi/1762652579.468371", - "retrieved_timestamp": "1762652579.4683719", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "1024m/PHI-4-Hindi", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "1024m/PHI-4-Hindi" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00816832670647216 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6710015642760666 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23338368580060423 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3976510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4913541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.523936170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/BlackBeenie_Neos-Phi-3-14B-v0.1/6d6aa9c5-cb3f-4c30-bd1a-ba951c9ad0e8.json b/leaderboard_data/HFOpenLLMv2/microsoft/BlackBeenie_Neos-Phi-3-14B-v0.1/6d6aa9c5-cb3f-4c30-bd1a-ba951c9ad0e8.json deleted file mode 100644 index 3d9dc4e914c577badfb054c01a2d1dfc0152b3b8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/BlackBeenie_Neos-Phi-3-14B-v0.1/6d6aa9c5-cb3f-4c30-bd1a-ba951c9ad0e8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/BlackBeenie_Neos-Phi-3-14B-v0.1/1762652579.4966102", - "retrieved_timestamp": "1762652579.496611", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "BlackBeenie/Neos-Phi-3-14B-v0.1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "BlackBeenie/Neos-Phi-3-14B-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4022449323350931 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6211931530444463 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1782477341389728 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41254166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45636635638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.96 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Daemontatox_Phi-4-COT/4ab23cde-aadb-424d-a88e-e7029a2f5c57.json b/leaderboard_data/HFOpenLLMv2/microsoft/Daemontatox_Phi-4-COT/4ab23cde-aadb-424d-a88e-e7029a2f5c57.json deleted file mode 100644 index d82241df0c70a20ffcc47e083c286d64327f10a0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Daemontatox_Phi-4-COT/4ab23cde-aadb-424d-a88e-e7029a2f5c57.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_Phi-4-COT/1762652579.5296152", - "retrieved_timestamp": "1762652579.5296159", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/Phi-4-COT", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Daemontatox/Phi-4-COT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17930313789633728 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6172933868833469 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2243202416918429 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.453 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.500498670212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Daemontatox_SphinX/118ee97a-cc78-4b4d-99c4-58d37b4a48ba.json b/leaderboard_data/HFOpenLLMv2/microsoft/Daemontatox_SphinX/118ee97a-cc78-4b4d-99c4-58d37b4a48ba.json deleted file mode 100644 index b78edbc6ca228b524e21ba98be9a96a854ff7799..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Daemontatox_SphinX/118ee97a-cc78-4b4d-99c4-58d37b4a48ba.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_SphinX/1762652579.531104", - "retrieved_timestamp": "1762652579.531104", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/SphinX", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Daemontatox/SphinX" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5725042886208593 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5440583486084486 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3081570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44049999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43658577127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Daemontatox_Sphinx2.0/07d85f99-840b-403a-bace-99712f3469b7.json b/leaderboard_data/HFOpenLLMv2/microsoft/Daemontatox_Sphinx2.0/07d85f99-840b-403a-bace-99712f3469b7.json deleted file mode 100644 index 5994467aeef3b4d87d43744964a907bfec3f301f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Daemontatox_Sphinx2.0/07d85f99-840b-403a-bace-99712f3469b7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_Sphinx2.0/1762652579.531323", - "retrieved_timestamp": "1762652579.531324", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/Sphinx2.0", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Daemontatox/Sphinx2.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7123133286346892 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.647283976671531 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40181268882175225 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42603125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5183676861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Daemontatox_TinySphinx/6d501ffa-e205-4522-9af5-7036463a5b05.json b/leaderboard_data/HFOpenLLMv2/microsoft/Daemontatox_TinySphinx/6d501ffa-e205-4522-9af5-7036463a5b05.json deleted file mode 100644 index b82c6528239df176c44b5dc1a955baf03906c868..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Daemontatox_TinySphinx/6d501ffa-e205-4522-9af5-7036463a5b05.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_TinySphinx/1762652579.5315351", - "retrieved_timestamp": "1762652579.5315359", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/TinySphinx", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Daemontatox/TinySphinx" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2566900269063862 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33098404240871354 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33276041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1697972074468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.247 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Daemontatox_TinySphinx2.0/da5d131c-5ae9-462e-87b1-92ead75eddb9.json b/leaderboard_data/HFOpenLLMv2/microsoft/Daemontatox_TinySphinx2.0/da5d131c-5ae9-462e-87b1-92ead75eddb9.json deleted file mode 100644 index b3c9637e91d08806a610222a4ac628908d9c3f01..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Daemontatox_TinySphinx2.0/da5d131c-5ae9-462e-87b1-92ead75eddb9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Daemontatox_TinySphinx2.0/1762652579.531743", - "retrieved_timestamp": "1762652579.531744", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Daemontatox/TinySphinx2.0", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Daemontatox/TinySphinx2.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25351733400710114 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3168407073661037 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33825 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1731216755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.247 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Danielbrdz_Barcenas-14b-Phi-3-medium-ORPO/f9ce1ec0-e727-474b-acb7-1ba49311e355.json b/leaderboard_data/HFOpenLLMv2/microsoft/Danielbrdz_Barcenas-14b-Phi-3-medium-ORPO/f9ce1ec0-e727-474b-acb7-1ba49311e355.json deleted file mode 100644 index da7a6fe5aedd238f500a37cfc73a6771cbe21c3e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Danielbrdz_Barcenas-14b-Phi-3-medium-ORPO/f9ce1ec0-e727-474b-acb7-1ba49311e355.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-14b-Phi-3-medium-ORPO/1762652579.53347", - "retrieved_timestamp": "1762652579.5334709", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Danielbrdz/Barcenas-14b-Phi-3-medium-ORPO", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Danielbrdz/Barcenas-14b-Phi-3-medium-ORPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4799055395240185 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6536184886648629 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20241691842900303 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48075 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47232380319148937 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 13.96 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Danielbrdz_Barcenas-14b-phi-4-v2/4180c069-33e8-4109-9d35-dde82549ba26.json b/leaderboard_data/HFOpenLLMv2/microsoft/Danielbrdz_Barcenas-14b-phi-4-v2/4180c069-33e8-4109-9d35-dde82549ba26.json deleted file mode 100644 index 9b0d63d4329165aac5e7da15285e62fdbe3ebc7a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Danielbrdz_Barcenas-14b-phi-4-v2/4180c069-33e8-4109-9d35-dde82549ba26.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-14b-phi-4-v2/1762652579.533969", - "retrieved_timestamp": "1762652579.533969", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Danielbrdz/Barcenas-14b-phi-4-v2", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Danielbrdz/Barcenas-14b-phi-4-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27747266142723526 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6573002324945257 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3217522658610272 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3783557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43994791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5243517287234043 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Danielbrdz_Barcenas-14b-phi-4/720029f0-41d5-4161-878e-4218f230455c.json b/leaderboard_data/HFOpenLLMv2/microsoft/Danielbrdz_Barcenas-14b-phi-4/720029f0-41d5-4161-878e-4218f230455c.json deleted file mode 100644 index 0761391091a29d54dd72c3f0c103e8388518c848..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Danielbrdz_Barcenas-14b-phi-4/720029f0-41d5-4161-878e-4218f230455c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-14b-phi-4/1762652579.533744", - "retrieved_timestamp": "1762652579.533744", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Danielbrdz/Barcenas-14b-phi-4", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Danielbrdz/Barcenas-14b-phi-4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0497590836757581 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6769303819643072 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2583081570996979 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38338926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5096770833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5174534574468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/DreadPoor_Morphing-8B-Model_Stock/0fd25475-5202-4cd1-b399-bfb8e113d85b.json b/leaderboard_data/HFOpenLLMv2/microsoft/DreadPoor_Morphing-8B-Model_Stock/0fd25475-5202-4cd1-b399-bfb8e113d85b.json deleted file mode 100644 index 3d283c040772572898a6258211f01a224b2ee4b9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/DreadPoor_Morphing-8B-Model_Stock/0fd25475-5202-4cd1-b399-bfb8e113d85b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_Morphing-8B-Model_Stock/1762652579.577464", - "retrieved_timestamp": "1762652579.577465", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/Morphing-8B-Model_Stock", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "DreadPoor/Morphing-8B-Model_Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.744536718130117 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5396942172954088 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18882175226586104 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4068645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38522273936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/EpistemeAI2_Fireball-Phi-3-medium-4k-inst-Philos/a60477a1-b815-4c82-a9e9-f017cb7b5ec9.json b/leaderboard_data/HFOpenLLMv2/microsoft/EpistemeAI2_Fireball-Phi-3-medium-4k-inst-Philos/a60477a1-b815-4c82-a9e9-f017cb7b5ec9.json deleted file mode 100644 index 62ea445bfae0e66865ad5a40b6bc47213c20a8ca..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/EpistemeAI2_Fireball-Phi-3-medium-4k-inst-Philos/a60477a1-b815-4c82-a9e9-f017cb7b5ec9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Phi-3-medium-4k-inst-Philos/1762652579.612791", - "retrieved_timestamp": "1762652579.612792", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-Phi-3-medium-4k-inst-Philos", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-Phi-3-medium-4k-inst-Philos" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5312880933700359 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6177842639287514 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41390625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45985704787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 13.96 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/EpistemeAI_DeepThinkers-Phi4/3c97155d-c086-42aa-af12-14316fcf723c.json b/leaderboard_data/HFOpenLLMv2/microsoft/EpistemeAI_DeepThinkers-Phi4/3c97155d-c086-42aa-af12-14316fcf723c.json deleted file mode 100644 index 85eb6157f47709b092c4e7d5aa86978b70920b89..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/EpistemeAI_DeepThinkers-Phi4/3c97155d-c086-42aa-af12-14316fcf723c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_DeepThinkers-Phi4/1762652579.599432", - "retrieved_timestamp": "1762652579.599433", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/DeepThinkers-Phi4", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "EpistemeAI/DeepThinkers-Phi4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6939786433330231 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6790415739665393 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45845921450151056 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060402684563756 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3980625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5257646276595744 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/EpistemeAI_Fireball-12B-v1.13a-philosophers/38fae832-3d96-457d-851b-7fcded3f7796.json b/leaderboard_data/HFOpenLLMv2/microsoft/EpistemeAI_Fireball-12B-v1.13a-philosophers/38fae832-3d96-457d-851b-7fcded3f7796.json deleted file mode 100644 index 31e81d8a1d2d433192cd8faa7553682c730cce26..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/EpistemeAI_Fireball-12B-v1.13a-philosophers/38fae832-3d96-457d-851b-7fcded3f7796.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-12B-v1.13a-philosophers/1762652579.60018", - "retrieved_timestamp": "1762652579.600181", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI/Fireball-12B-v1.13a-philosophers", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "EpistemeAI/Fireball-12B-v1.13a-philosophers" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08755324760524298 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5102697700597862 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4080729166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3366855053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/FINGU-AI_Phi-4-RRStock/9d85345f-d46b-4431-b5fb-5cca99d92f21.json b/leaderboard_data/HFOpenLLMv2/microsoft/FINGU-AI_Phi-4-RRStock/9d85345f-d46b-4431-b5fb-5cca99d92f21.json deleted file mode 100644 index 78cd7ff3fba4cd89c50e19ed4f94274a7ca6beea..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/FINGU-AI_Phi-4-RRStock/9d85345f-d46b-4431-b5fb-5cca99d92f21.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FINGU-AI_Phi-4-RRStock/1762652579.616194", - "retrieved_timestamp": "1762652579.616194", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FINGU-AI/Phi-4-RRStock", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "FINGU-AI/Phi-4-RRStock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28554125276488607 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6443442865581455 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44794791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48828125 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.652 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/HeraiHench_Phi-4-slerp-ReasoningRP-14B/ca0a3f22-099f-4207-acfe-4b70aa00171e.json b/leaderboard_data/HFOpenLLMv2/microsoft/HeraiHench_Phi-4-slerp-ReasoningRP-14B/ca0a3f22-099f-4207-acfe-4b70aa00171e.json deleted file mode 100644 index 8a5207468b6dce491e95c77f5c5f9a82d4b9f3ce..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/HeraiHench_Phi-4-slerp-ReasoningRP-14B/ca0a3f22-099f-4207-acfe-4b70aa00171e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/HeraiHench_Phi-4-slerp-ReasoningRP-14B/1762652579.639999", - "retrieved_timestamp": "1762652579.64", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "HeraiHench/Phi-4-slerp-ReasoningRP-14B", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "HeraiHench/Phi-4-slerp-ReasoningRP-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15754642127333254 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41957191458446336 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3116145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18999335106382978 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 9.207 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Josephgflowers_Cinder-Phi-2-V1-F16-gguf/4d0a565c-14b2-4ce9-97c0-4d114548fe48.json b/leaderboard_data/HFOpenLLMv2/microsoft/Josephgflowers_Cinder-Phi-2-V1-F16-gguf/4d0a565c-14b2-4ce9-97c0-4d114548fe48.json deleted file mode 100644 index e2754698aeb26ac24d28b74d098084878e02ab5a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Josephgflowers_Cinder-Phi-2-V1-F16-gguf/4d0a565c-14b2-4ce9-97c0-4d114548fe48.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Josephgflowers_Cinder-Phi-2-V1-F16-gguf/1762652579.694953", - "retrieved_timestamp": "1762652579.694954", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Josephgflowers/Cinder-Phi-2-V1-F16-gguf", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Josephgflowers/Cinder-Phi-2-V1-F16-gguf" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23565694579271884 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4396616219689493 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34345833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2160904255319149 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/MaziyarPanahi_calme-2.1-phi3-4b/79b4a850-85b6-45aa-8cc1-5210230a38aa.json b/leaderboard_data/HFOpenLLMv2/microsoft/MaziyarPanahi_calme-2.1-phi3-4b/79b4a850-85b6-45aa-8cc1-5210230a38aa.json deleted file mode 100644 index 8e813b5ab575e755f7e667bae5295fc528401e4e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/MaziyarPanahi_calme-2.1-phi3-4b/79b4a850-85b6-45aa-8cc1-5210230a38aa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-phi3-4b/1762652579.751861", - "retrieved_timestamp": "1762652579.751862", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.1-phi3-4b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.1-phi3-4b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.552520645221346 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5595320442699866 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3296979865771812 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40153124999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3745844414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/MaziyarPanahi_calme-2.1-phi3.5-4b/69433e39-158a-46df-a987-ac2a6b3af2af.json b/leaderboard_data/HFOpenLLMv2/microsoft/MaziyarPanahi_calme-2.1-phi3.5-4b/69433e39-158a-46df-a987-ac2a6b3af2af.json deleted file mode 100644 index 13acc94bf5c60cc7795d017a5652ab01a47569e1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/MaziyarPanahi_calme-2.1-phi3.5-4b/69433e39-158a-46df-a987-ac2a6b3af2af.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-phi3.5-4b/1762652579.752121", - "retrieved_timestamp": "1762652579.7521222", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.1-phi3.5-4b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.1-phi3.5-4b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5659095644002359 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5483695590203843 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3994583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3935339095744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/MaziyarPanahi_calme-2.2-phi3-4b/56593987-babd-4a30-9a20-f83e7d233809.json b/leaderboard_data/HFOpenLLMv2/microsoft/MaziyarPanahi_calme-2.2-phi3-4b/56593987-babd-4a30-9a20-f83e7d233809.json deleted file mode 100644 index 45e505aaf93141a43e9da9455e838106d0aa8a19..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/MaziyarPanahi_calme-2.2-phi3-4b/56593987-babd-4a30-9a20-f83e7d233809.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-phi3-4b/1762652579.7536151", - "retrieved_timestamp": "1762652579.7536159", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.2-phi3-4b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.2-phi3-4b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5069083365470286 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5529604896487258 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14501510574018128 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3975625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3813996010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/MaziyarPanahi_calme-2.3-phi3-4b/99b96f53-5ac6-4001-abc6-2a4e43f09028.json b/leaderboard_data/HFOpenLLMv2/microsoft/MaziyarPanahi_calme-2.3-phi3-4b/99b96f53-5ac6-4001-abc6-2a4e43f09028.json deleted file mode 100644 index b321d69d5684b6296aa2c0db1030eabf62ca58ee..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/MaziyarPanahi_calme-2.3-phi3-4b/99b96f53-5ac6-4001-abc6-2a4e43f09028.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-phi3-4b/1762652579.755463", - "retrieved_timestamp": "1762652579.755465", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "MaziyarPanahi/calme-2.3-phi3-4b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "MaziyarPanahi/calme-2.3-phi3-4b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49264507063480456 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5537867816134527 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1472809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3988333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3828125 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/NikolaSigmoid_phi-4-14b/cae2d4a1-4632-420f-be40-594f4c001d4d.json b/leaderboard_data/HFOpenLLMv2/microsoft/NikolaSigmoid_phi-4-14b/cae2d4a1-4632-420f-be40-594f4c001d4d.json deleted file mode 100644 index e660dd0bad92d73fefaf22e0eb98b6f3e810bbfa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/NikolaSigmoid_phi-4-14b/cae2d4a1-4632-420f-be40-594f4c001d4d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NikolaSigmoid_phi-4-14b/1762652579.784184", - "retrieved_timestamp": "1762652579.7841852", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NikolaSigmoid/phi-4-14b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "NikolaSigmoid/phi-4-14b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05607898154674043 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.669500080799667 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2938066465256798 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4035234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5046875000000001 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.527842420212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "", - "params_billions": 14.704 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/NikolaSigmoid_phi-4-1steps/a4763c48-f2ab-4f3e-bc1f-a7f4a9f33cf8.json b/leaderboard_data/HFOpenLLMv2/microsoft/NikolaSigmoid_phi-4-1steps/a4763c48-f2ab-4f3e-bc1f-a7f4a9f33cf8.json deleted file mode 100644 index 558f62c35783337f910d8a4976e2fe7fd3a3227c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/NikolaSigmoid_phi-4-1steps/a4763c48-f2ab-4f3e-bc1f-a7f4a9f33cf8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NikolaSigmoid_phi-4-1steps/1762652579.784436", - "retrieved_timestamp": "1762652579.784437", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NikolaSigmoid/phi-4-1steps", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "NikolaSigmoid/phi-4-1steps" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05275668559422333 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6707359457278651 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2983383685800604 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40184563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5020520833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.52734375 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "", - "params_billions": 14.704 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/NikolaSigmoid_phi-4-300steps/e54de9df-52e5-43d2-92c3-9d5207c0e335.json b/leaderboard_data/HFOpenLLMv2/microsoft/NikolaSigmoid_phi-4-300steps/e54de9df-52e5-43d2-92c3-9d5207c0e335.json deleted file mode 100644 index ba1ea3e00694410629e3fda8cf277ef65d3f4ee8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/NikolaSigmoid_phi-4-300steps/e54de9df-52e5-43d2-92c3-9d5207c0e335.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NikolaSigmoid_phi-4-300steps/1762652579.784649", - "retrieved_timestamp": "1762652579.78465", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NikolaSigmoid/phi-4-300steps", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "NikolaSigmoid/phi-4-300steps" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05607898154674043 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6701123802649077 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4052013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5033541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5287566489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "", - "params_billions": 14.704 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Novaciano_Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP/582f87ef-50c5-4a5b-9d76-bc71f97bd2fb.json b/leaderboard_data/HFOpenLLMv2/microsoft/Novaciano_Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP/582f87ef-50c5-4a5b-9d76-bc71f97bd2fb.json deleted file mode 100644 index c5f5a36839fa4ff8d32096884ed1661d14e48b9b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Novaciano_Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP/582f87ef-50c5-4a5b-9d76-bc71f97bd2fb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Novaciano_Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP/1762652579.7955709", - "retrieved_timestamp": "1762652579.795572", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Novaciano/Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Novaciano/Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5342856952885011 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35023897852759145 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10498489425981873 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3183125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1823470744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/NyxKrage_Microsoft_Phi-4/46494bad-fb41-4fa3-b568-be4e6a22ae5b.json b/leaderboard_data/HFOpenLLMv2/microsoft/NyxKrage_Microsoft_Phi-4/46494bad-fb41-4fa3-b568-be4e6a22ae5b.json deleted file mode 100644 index 5310bf94b1183b7b583be9f66d11f5f2226d22ab..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/NyxKrage_Microsoft_Phi-4/46494bad-fb41-4fa3-b568-be4e6a22ae5b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NyxKrage_Microsoft_Phi-4/1762652579.7969122", - "retrieved_timestamp": "1762652579.796913", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NyxKrage/Microsoft_Phi-4", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "NyxKrage/Microsoft_Phi-4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0585269307659233 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6690562305322874 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2990936555891239 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40604026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5033541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5286735372340425 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Orion-zhen_phi-4-abliterated/3970f988-26f6-4810-839a-e5f4fcd6618a.json b/leaderboard_data/HFOpenLLMv2/microsoft/Orion-zhen_phi-4-abliterated/3970f988-26f6-4810-839a-e5f4fcd6618a.json deleted file mode 100644 index bdb7600f458c37eaf741090eebd6fc6d20a91954..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Orion-zhen_phi-4-abliterated/3970f988-26f6-4810-839a-e5f4fcd6618a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Orion-zhen_phi-4-abliterated/1762652579.808864", - "retrieved_timestamp": "1762652579.808865", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Orion-zhen/phi-4-abliterated", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Orion-zhen/phi-4-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05760271634817839 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6698239306664778 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3021148036253776 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40436241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.500625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5291722074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Quazim0t0_CoT_Phi/ed579ba1-fcd3-4279-ac93-d0340a771e43.json b/leaderboard_data/HFOpenLLMv2/microsoft/Quazim0t0_CoT_Phi/ed579ba1-fcd3-4279-ac93-d0340a771e43.json deleted file mode 100644 index 08c7fd90266dd8eb18d0df90f1daeb9cbde8ee9b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Quazim0t0_CoT_Phi/ed579ba1-fcd3-4279-ac93-d0340a771e43.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_CoT_Phi/1762652579.820767", - "retrieved_timestamp": "1762652579.820768", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/CoT_Phi", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Quazim0t0/CoT_Phi" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6158681188136367 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6750841958594904 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33081570996978854 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35822147651006714 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42435416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4901097074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Quazim0t0_Lo-Phi-14b/b37d3d27-5ba0-44d6-bd19-1196a98b75b4.json b/leaderboard_data/HFOpenLLMv2/microsoft/Quazim0t0_Lo-Phi-14b/b37d3d27-5ba0-44d6-bd19-1196a98b75b4.json deleted file mode 100644 index c2e5e82d8de670f4a0e3731e6fe430e62b25f3da..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Quazim0t0_Lo-Phi-14b/b37d3d27-5ba0-44d6-bd19-1196a98b75b4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Lo-Phi-14b/1762652579.825307", - "retrieved_timestamp": "1762652579.8253078", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Lo-Phi-14b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Quazim0t0/Lo-Phi-14b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4941189377518318 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6851928144814953 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42323958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5369015957446809 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Quazim0t0_Math_Phi4_Reason/1c2a87ca-9f1a-4d32-b1da-743927b722b0.json b/leaderboard_data/HFOpenLLMv2/microsoft/Quazim0t0_Math_Phi4_Reason/1c2a87ca-9f1a-4d32-b1da-743927b722b0.json deleted file mode 100644 index edd469896b7ccd235d0f58a2bbd0e721cb05d783..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Quazim0t0_Math_Phi4_Reason/1c2a87ca-9f1a-4d32-b1da-743927b722b0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Math_Phi4_Reason/1762652579.826147", - "retrieved_timestamp": "1762652579.826147", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Math_Phi4_Reason", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Quazim0t0/Math_Phi4_Reason" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3220111526305758 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6240212275403677 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32779456193353473 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4034270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5029920212765957 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Quazim0t0_Phi4.Turn.R1Distill.16bit/44749932-f3e3-45ad-bb4b-135a6d656e3b.json b/leaderboard_data/HFOpenLLMv2/microsoft/Quazim0t0_Phi4.Turn.R1Distill.16bit/44749932-f3e3-45ad-bb4b-135a6d656e3b.json deleted file mode 100644 index 3b86c360299226eabc777e7997e01042a3fa1a03..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Quazim0t0_Phi4.Turn.R1Distill.16bit/44749932-f3e3-45ad-bb4b-135a6d656e3b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Phi4.Turn.R1Distill.16bit/1762652579.8283992", - "retrieved_timestamp": "1762652579.8283992", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Phi4.Turn.R1Distill.16bit", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Quazim0t0/Phi4.Turn.R1Distill.16bit" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31264378515671754 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6563340892011863 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39021875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5256815159574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Quazim0t0_Phi4.Turn.R1Distill_v1.5.1-Tensors/5f1b91c8-28d0-4274-8979-32416003fafb.json b/leaderboard_data/HFOpenLLMv2/microsoft/Quazim0t0_Phi4.Turn.R1Distill_v1.5.1-Tensors/5f1b91c8-28d0-4274-8979-32416003fafb.json deleted file mode 100644 index 32bc5ce18f8f05f433b6a9f8ef7675d321489fc8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Quazim0t0_Phi4.Turn.R1Distill_v1.5.1-Tensors/5f1b91c8-28d0-4274-8979-32416003fafb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Phi4.Turn.R1Distill_v1.5.1-Tensors/1762652579.8286002", - "retrieved_timestamp": "1762652579.8286011", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Phi4.Turn.R1Distill_v1.5.1-Tensors", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Quazim0t0/Phi4.Turn.R1Distill_v1.5.1-Tensors" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2995296923274689 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.645570250166195 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39285416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.51171875 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Quazim0t0_Phi4Basis-14B-sce/d101111a-31bd-4eec-9a53-52543f6d5fd5.json b/leaderboard_data/HFOpenLLMv2/microsoft/Quazim0t0_Phi4Basis-14B-sce/d101111a-31bd-4eec-9a53-52543f6d5fd5.json deleted file mode 100644 index 3abedb59ae0ad3ec635e4f8222116ec61b0cc4da..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Quazim0t0_Phi4Basis-14B-sce/d101111a-31bd-4eec-9a53-52543f6d5fd5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_Phi4Basis-14B-sce/1762652579.828811", - "retrieved_timestamp": "1762652579.8288121", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/Phi4Basis-14B-sce", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Quazim0t0/Phi4Basis-14B-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6501648958097848 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6909074263536413 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4788519637462236 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43378125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5389793882978723 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Quazim0t0_ThinkPhi1.1-Tensors/056e62d9-ab3e-4bf3-8693-47a5aea7f84f.json b/leaderboard_data/HFOpenLLMv2/microsoft/Quazim0t0_ThinkPhi1.1-Tensors/056e62d9-ab3e-4bf3-8693-47a5aea7f84f.json deleted file mode 100644 index 2845e6290be5ffae1feb8e2e3e69cd6cdcf35fc6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Quazim0t0_ThinkPhi1.1-Tensors/056e62d9-ab3e-4bf3-8693-47a5aea7f84f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_ThinkPhi1.1-Tensors/1762652579.831269", - "retrieved_timestamp": "1762652579.831269", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/ThinkPhi1.1-Tensors", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Quazim0t0/ThinkPhi1.1-Tensors" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3907543096761038 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6449416604455037 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18202416918429004 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.418 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4907746010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Quazim0t0_graphite-14b-sce/bd98b886-a899-4022-aee4-09ea0e491fe3.json b/leaderboard_data/HFOpenLLMv2/microsoft/Quazim0t0_graphite-14b-sce/bd98b886-a899-4022-aee4-09ea0e491fe3.json deleted file mode 100644 index 810b623893dc2e1e1a75b97e9f7fec0227c62fc1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Quazim0t0_graphite-14b-sce/bd98b886-a899-4022-aee4-09ea0e491fe3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Quazim0t0_graphite-14b-sce/1762652579.833386", - "retrieved_timestamp": "1762652579.833387", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Quazim0t0/graphite-14b-sce", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Quazim0t0/graphite-14b-sce" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3216864585965239 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6631420093244736 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30060422960725075 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.398125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5280086436170213 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Sakalti_Phi3.5-Comets-3.8B/7d9a3955-232c-4a93-b879-bd065bab4768.json b/leaderboard_data/HFOpenLLMv2/microsoft/Sakalti_Phi3.5-Comets-3.8B/7d9a3955-232c-4a93-b879-bd065bab4768.json deleted file mode 100644 index eeb62ffe562c65dbce5808329f9da67ae1fe8d28..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Sakalti_Phi3.5-Comets-3.8B/7d9a3955-232c-4a93-b879-bd065bab4768.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sakalti_Phi3.5-Comets-3.8B/1762652579.858093", - "retrieved_timestamp": "1762652579.858093", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sakalti/Phi3.5-Comets-3.8B", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Sakalti/Phi3.5-Comets-3.8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20942876013422163 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3335116874180515 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3763541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11527593085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/SicariusSicariiStuff_Phi-Line_14B/12b2a13d-2b38-47e6-a6d2-3d5a30bff5ae.json b/leaderboard_data/HFOpenLLMv2/microsoft/SicariusSicariiStuff_Phi-Line_14B/12b2a13d-2b38-47e6-a6d2-3d5a30bff5ae.json deleted file mode 100644 index 2b3d07eb1a83a4975936fec6e2511eeeeaf31168..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/SicariusSicariiStuff_Phi-Line_14B/12b2a13d-2b38-47e6-a6d2-3d5a30bff5ae.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Phi-Line_14B/1762652579.8832798", - "retrieved_timestamp": "1762652579.8832798", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SicariusSicariiStuff/Phi-Line_14B", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Phi-Line_14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6495653754260917 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6154430096216078 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859516616314199 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35318791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44785416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5453789893617021 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/SicariusSicariiStuff_Phi-lthy4/56fa06dd-fd07-4613-9ac5-81c739cb6a64.json b/leaderboard_data/HFOpenLLMv2/microsoft/SicariusSicariiStuff_Phi-lthy4/56fa06dd-fd07-4613-9ac5-81c739cb6a64.json deleted file mode 100644 index 085a1290629fae4c4e1d9bf3a28045760887e329..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/SicariusSicariiStuff_Phi-lthy4/56fa06dd-fd07-4613-9ac5-81c739cb6a64.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Phi-lthy4/1762652579.883529", - "retrieved_timestamp": "1762652579.88353", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "SicariusSicariiStuff/Phi-lthy4", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "SicariusSicariiStuff/Phi-lthy4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7679423928509688 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.587935701572946 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13670694864048338 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40829166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.433344414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 11.933 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Triangle104_Phi-4-AbliteratedRP/ef628438-c2ff-4939-8bf1-09f1df25fd15.json b/leaderboard_data/HFOpenLLMv2/microsoft/Triangle104_Phi-4-AbliteratedRP/ef628438-c2ff-4939-8bf1-09f1df25fd15.json deleted file mode 100644 index 9a7abc1fdfc32e3144707bb84b39906d0192d24a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Triangle104_Phi-4-AbliteratedRP/ef628438-c2ff-4939-8bf1-09f1df25fd15.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Phi-4-AbliteratedRP/1762652579.931047", - "retrieved_timestamp": "1762652579.931048", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Phi-4-AbliteratedRP", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Triangle104/Phi-4-AbliteratedRP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49227050891634194 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6708776140201277 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3074018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5098333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.530751329787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Triangle104_Phi4-RP-o1-Ablit/c3578998-b9dc-4b42-a8cb-0bdf05cffc9f.json b/leaderboard_data/HFOpenLLMv2/microsoft/Triangle104_Phi4-RP-o1-Ablit/c3578998-b9dc-4b42-a8cb-0bdf05cffc9f.json deleted file mode 100644 index 1feb7d2ea8dc89cb0a39c5147e96168cdd6555cb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Triangle104_Phi4-RP-o1-Ablit/c3578998-b9dc-4b42-a8cb-0bdf05cffc9f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Phi4-RP-o1-Ablit/1762652579.93156", - "retrieved_timestamp": "1762652579.93156", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Phi4-RP-o1-Ablit", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Triangle104/Phi4-RP-o1-Ablit" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02385559205131274 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6629825730619672 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38821752265861026 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36325503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47541666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5104720744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Triangle104_Phi4-RP-o1/9ed49666-aee1-43d0-8c7c-98c178860f0c.json b/leaderboard_data/HFOpenLLMv2/microsoft/Triangle104_Phi4-RP-o1/9ed49666-aee1-43d0-8c7c-98c178860f0c.json deleted file mode 100644 index 445dc4b35e0f25d11daff1b9071280200ed06d01..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Triangle104_Phi4-RP-o1/9ed49666-aee1-43d0-8c7c-98c178860f0c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Phi4-RP-o1/1762652579.9312892", - "retrieved_timestamp": "1762652579.9312901", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Phi4-RP-o1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Triangle104/Phi4-RP-o1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.022007163215822904 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6652563961373095 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3776435045317221 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4755729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5110538563829787 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Undi95_Phi4-abliterated/29c3f781-f49c-4afc-bbc4-a47aebc91f71.json b/leaderboard_data/HFOpenLLMv2/microsoft/Undi95_Phi4-abliterated/29c3f781-f49c-4afc-bbc4-a47aebc91f71.json deleted file mode 100644 index ccbd2bbb3c8450cf808bbeb2f54eb37ec521c8c9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Undi95_Phi4-abliterated/29c3f781-f49c-4afc-bbc4-a47aebc91f71.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Undi95_Phi4-abliterated/1762652579.9391701", - "retrieved_timestamp": "1762652579.939171", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Undi95/Phi4-abliterated", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Undi95/Phi4-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6617552538375954 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.680902103041113 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37009063444108764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4034270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.528091755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/VAGOsolutions_SauerkrautLM-Phi-3-medium/ae8b39a7-7fca-441f-bae3-8db76879cefe.json b/leaderboard_data/HFOpenLLMv2/microsoft/VAGOsolutions_SauerkrautLM-Phi-3-medium/ae8b39a7-7fca-441f-bae3-8db76879cefe.json deleted file mode 100644 index 7891795dc3ab6b7738c61534d262d46ab82d9ebc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/VAGOsolutions_SauerkrautLM-Phi-3-medium/ae8b39a7-7fca-441f-bae3-8db76879cefe.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-Phi-3-medium/1762652579.942282", - "retrieved_timestamp": "1762652579.942282", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "VAGOsolutions/SauerkrautLM-Phi-3-medium", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "VAGOsolutions/SauerkrautLM-Phi-3-medium" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4408879550703245 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6432931765847228 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4845 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46650598404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 13.96 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Xiaojian9992024_Phi-4-Megatron-Empathetic/aec0af15-927b-48bd-a889-d4715aff4c42.json b/leaderboard_data/HFOpenLLMv2/microsoft/Xiaojian9992024_Phi-4-Megatron-Empathetic/aec0af15-927b-48bd-a889-d4715aff4c42.json deleted file mode 100644 index af4220d3fc7a53247d6e14bff37894e423c8559d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Xiaojian9992024_Phi-4-Megatron-Empathetic/aec0af15-927b-48bd-a889-d4715aff4c42.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Phi-4-Megatron-Empathetic/1762652579.952935", - "retrieved_timestamp": "1762652579.952936", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Xiaojian9992024/Phi-4-Megatron-Empathetic", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Xiaojian9992024/Phi-4-Megatron-Empathetic" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01726086783068924 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6673396558729835 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26963746223564955 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5071354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5082280585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Xiaojian9992024_Phi-4-mini-UNOFFICAL/058de011-1e80-4a6d-803f-8ba7f927cd7f.json b/leaderboard_data/HFOpenLLMv2/microsoft/Xiaojian9992024_Phi-4-mini-UNOFFICAL/058de011-1e80-4a6d-803f-8ba7f927cd7f.json deleted file mode 100644 index b5902a57e43be6071edc23aa6ca18c66225a6b78..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Xiaojian9992024_Phi-4-mini-UNOFFICAL/058de011-1e80-4a6d-803f-8ba7f927cd7f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Phi-4-mini-UNOFFICAL/1762652579.9531882", - "retrieved_timestamp": "1762652579.9531891", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Xiaojian9992024/Phi-4-mini-UNOFFICAL", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Xiaojian9992024/Phi-4-mini-UNOFFICAL" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12732106366662677 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29444372790183987 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2407718120805369 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3368229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11444481382978723 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.754 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/Youlln_3PRYMMAL-PHI3-3B-SLERP/2c53181b-8681-46ad-b739-396b1ecb163c.json b/leaderboard_data/HFOpenLLMv2/microsoft/Youlln_3PRYMMAL-PHI3-3B-SLERP/2c53181b-8681-46ad-b739-396b1ecb163c.json deleted file mode 100644 index 8fcf0636bac8a877fe160bd3940602b886e4be31..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/Youlln_3PRYMMAL-PHI3-3B-SLERP/2c53181b-8681-46ad-b739-396b1ecb163c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Youlln_3PRYMMAL-PHI3-3B-SLERP/1762652579.9609358", - "retrieved_timestamp": "1762652579.960937", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Youlln/3PRYMMAL-PHI3-3B-SLERP", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "Youlln/3PRYMMAL-PHI3-3B-SLERP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3655500738041729 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5421833887682153 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1714501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46484375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4001828457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/abideen_MedPhi-4-14B-v1/0367a9de-960b-4c1d-8e63-8dea06197bfa.json b/leaderboard_data/HFOpenLLMv2/microsoft/abideen_MedPhi-4-14B-v1/0367a9de-960b-4c1d-8e63-8dea06197bfa.json deleted file mode 100644 index eb07f67c4f4eee122438e11c791961b555bfc5b0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/abideen_MedPhi-4-14B-v1/0367a9de-960b-4c1d-8e63-8dea06197bfa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/abideen_MedPhi-4-14B-v1/1762652579.973941", - "retrieved_timestamp": "1762652579.973942", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "abideen/MedPhi-4-14B-v1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "abideen/MedPhi-4-14B-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6276834355066778 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6896781879584077 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2930513595166163 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4154583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5338264627659575 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/allknowingroger_MistralPhi3-11B/f7f557cf-4c63-444a-8c8f-515796b9b127.json b/leaderboard_data/HFOpenLLMv2/microsoft/allknowingroger_MistralPhi3-11B/f7f557cf-4c63-444a-8c8f-515796b9b127.json deleted file mode 100644 index 788cfa87403292af8dbaa896ba7cbe8bf69da76c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/allknowingroger_MistralPhi3-11B/f7f557cf-4c63-444a-8c8f-515796b9b127.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_MistralPhi3-11B/1762652579.990464", - "retrieved_timestamp": "1762652579.990464", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/MistralPhi3-11B", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "allknowingroger/MistralPhi3-11B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1942911474886634 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6234314600705605 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4266770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46875 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 11.234 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/allknowingroger_Phi3mash1-17B-pass/83ec9172-5769-4737-a766-0ca2006dd3e4.json b/leaderboard_data/HFOpenLLMv2/microsoft/allknowingroger_Phi3mash1-17B-pass/83ec9172-5769-4737-a766-0ca2006dd3e4.json deleted file mode 100644 index 2ad15e6e78379c2be33e347b0ec726d94bff8b4a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/allknowingroger_Phi3mash1-17B-pass/83ec9172-5769-4737-a766-0ca2006dd3e4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Phi3mash1-17B-pass/1762652579.997936", - "retrieved_timestamp": "1762652579.997937", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Phi3mash1-17B-pass", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "allknowingroger/Phi3mash1-17B-pass" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18842116694814204 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6128878795560929 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.445125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45894281914893614 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 16.687 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/allknowingroger_ROGERphi-7B-slerp/9e7ef237-2e59-429d-9784-45de952f60af.json b/leaderboard_data/HFOpenLLMv2/microsoft/allknowingroger_ROGERphi-7B-slerp/9e7ef237-2e59-429d-9784-45de952f60af.json deleted file mode 100644 index ac036384cfec2cad18b097bc8b2a91bce71eb2b1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/allknowingroger_ROGERphi-7B-slerp/9e7ef237-2e59-429d-9784-45de952f60af.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_ROGERphi-7B-slerp/1762652580.0022678", - "retrieved_timestamp": "1762652580.002269", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/ROGERphi-7B-slerp", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "allknowingroger/ROGERphi-7B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3861332375873793 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5195583428468424 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46853125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3052692819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/benhaotang_phi4-qwq-sky-t1/08f1ef63-efc7-449c-92cf-6f180b9d2712.json b/leaderboard_data/HFOpenLLMv2/microsoft/benhaotang_phi4-qwq-sky-t1/08f1ef63-efc7-449c-92cf-6f180b9d2712.json deleted file mode 100644 index 3635b0251504420e9f8f382be56ae618e817d2d2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/benhaotang_phi4-qwq-sky-t1/08f1ef63-efc7-449c-92cf-6f180b9d2712.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/benhaotang_phi4-qwq-sky-t1/1762652580.030136", - "retrieved_timestamp": "1762652580.030137", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "benhaotang/phi4-qwq-sky-t1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "benhaotang/phi4-qwq-sky-t1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04596249063595704 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6710520703782934 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48995833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5244348404255319 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-3.5-mini-TitanFusion-0.1/60823e05-59e3-4c4c-a23e-8ef495aa39be.json b/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-3.5-mini-TitanFusion-0.1/60823e05-59e3-4c4c-a23e-8ef495aa39be.json deleted file mode 100644 index 97826a2de73eaa8cb95bbe1dca5051cf40f98f10..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-3.5-mini-TitanFusion-0.1/60823e05-59e3-4c4c-a23e-8ef495aa39be.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-3.5-mini-TitanFusion-0.1/1762652580.04916", - "retrieved_timestamp": "1762652580.049161", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Phi-3.5-mini-TitanFusion-0.1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-3.5-mini-TitanFusion-0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5227950726295119 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5373733988565133 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11858006042296072 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4453125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3806515957446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-Model-Stock-v2/5bc6e404-5798-4d19-88d1-5a8153947227.json b/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-Model-Stock-v2/5bc6e404-5798-4d19-88d1-5a8153947227.json deleted file mode 100644 index 2f9d6926b844bda0dc55f9c34929aa83e930377e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-Model-Stock-v2/5bc6e404-5798-4d19-88d1-5a8153947227.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Model-Stock-v2/1762652580.050115", - "retrieved_timestamp": "1762652580.050116", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Phi-4-Model-Stock-v2", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-4-Model-Stock-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.63752510006782 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6824667320746144 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37537764350453173 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46617708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5330784574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-Model-Stock-v3/5832ef9b-bd14-46ba-b04d-049280bc5267.json b/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-Model-Stock-v3/5832ef9b-bd14-46ba-b04d-049280bc5267.json deleted file mode 100644 index 54d5a53e02870a12553468785cc4cf7afb32b270..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-Model-Stock-v3/5832ef9b-bd14-46ba-b04d-049280bc5267.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Model-Stock-v3/1762652580.050334", - "retrieved_timestamp": "1762652580.050335", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Phi-4-Model-Stock-v3", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-4-Model-Stock-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5911636679565775 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6726298549419627 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4901812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41663541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5381482712765957 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-Model-Stock-v4/92363115-37f2-4d2f-8178-61fc98c8f337.json b/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-Model-Stock-v4/92363115-37f2-4d2f-8178-61fc98c8f337.json deleted file mode 100644 index 2939f7d43d87ac7780f713aafec6a3f9332123bb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-Model-Stock-v4/92363115-37f2-4d2f-8178-61fc98c8f337.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Model-Stock-v4/1762652580.0505521", - "retrieved_timestamp": "1762652580.050553", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Phi-4-Model-Stock-v4", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-4-Model-Stock-v4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7110145524984818 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6924302574038697 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4610625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5393949468085106 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-Model-Stock/cee9b876-96b3-4429-af70-6a5b45747a3b.json b/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-Model-Stock/cee9b876-96b3-4429-af70-6a5b45747a3b.json deleted file mode 100644 index 49f032f10471c7e625b668b8af47589bc83e5aea..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-Model-Stock/cee9b876-96b3-4429-af70-6a5b45747a3b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Model-Stock/1762652580.0497222", - "retrieved_timestamp": "1762652580.049727", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Phi-4-Model-Stock", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-4-Model-Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6878837041272712 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6889699980822082 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4297583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3548657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44413541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5368184840425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-RP-v0/29135c1b-e6a0-428a-ba4f-459e9b652d25.json b/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-RP-v0/29135c1b-e6a0-428a-ba4f-459e9b652d25.json deleted file mode 100644 index 9baaa598cb6aa7a6a938fc43aa0c37aadd9d9292..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-RP-v0/29135c1b-e6a0-428a-ba4f-459e9b652d25.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-RP-v0/1762652580.050766", - "retrieved_timestamp": "1762652580.0507672", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Phi-4-RP-v0", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-4-RP-v0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6827129793392643 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.685633603278299 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33157099697885195 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41409375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5364029255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-RR-Shoup/377bc688-a18e-4abb-91f7-d78a934e1649.json b/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-RR-Shoup/377bc688-a18e-4abb-91f7-d78a934e1649.json deleted file mode 100644 index 0960d2f1be4e4d47ae0bff5062467c6c4f2c9816..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-RR-Shoup/377bc688-a18e-4abb-91f7-d78a934e1649.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-RR-Shoup/1762652580.050983", - "retrieved_timestamp": "1762652580.050983", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Phi-4-RR-Shoup", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-4-RR-Shoup" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6586579165503088 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6947025970028124 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49924471299093653 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44404166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5428856382978723 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-RStock-v0.1/cf300641-1ec3-4ee7-b38d-b274ebc23ff2.json b/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-RStock-v0.1/cf300641-1ec3-4ee7-b38d-b274ebc23ff2.json deleted file mode 100644 index 86bbe27d3755390a71994b6c0614478d731203b5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-RStock-v0.1/cf300641-1ec3-4ee7-b38d-b274ebc23ff2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-RStock-v0.1/1762652580.051188", - "retrieved_timestamp": "1762652580.051189", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Phi-4-RStock-v0.1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-4-RStock-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7018721436898541 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6928310064675399 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3950151057401813 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3649328859060403 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45836458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5400598404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-ReasoningRP/5db77608-f892-4ac4-93c4-03f177696484.json b/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-ReasoningRP/5db77608-f892-4ac4-93c4-03f177696484.json deleted file mode 100644 index 886724d71fdabc7da9a1973ca5b0d12d4c2dd090..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-ReasoningRP/5db77608-f892-4ac4-93c4-03f177696484.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-ReasoningRP/1762652580.05142", - "retrieved_timestamp": "1762652580.051421", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Phi-4-ReasoningRP", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-4-ReasoningRP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6736204382150472 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6922187070022994 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4569486404833837 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44909375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5420545212765957 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-Sce-exp-v0.1/c8de0acd-7cce-45c0-9032-2b717f3917b8.json b/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-Sce-exp-v0.1/c8de0acd-7cce-45c0-9032-2b717f3917b8.json deleted file mode 100644 index 1edca2f10bcec72b2e9c4b39b0872162781790fb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-Sce-exp-v0.1/c8de0acd-7cce-45c0-9032-2b717f3917b8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Sce-exp-v0.1/1762652580.0516632", - "retrieved_timestamp": "1762652580.0516639", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Phi-4-Sce-exp-v0.1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-4-Sce-exp-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6595322632836429 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.694317957938629 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5030211480362538 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44407291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5423038563829787 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-Stock-Ex/bc007572-56ff-449a-9e3d-5ab770c3ae44.json b/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-Stock-Ex/bc007572-56ff-449a-9e3d-5ab770c3ae44.json deleted file mode 100644 index 25367b08a00e33a9a0a6a962ef0644783e96baed..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-Stock-Ex/bc007572-56ff-449a-9e3d-5ab770c3ae44.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Stock-Ex/1762652580.051897", - "retrieved_timestamp": "1762652580.051897", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Phi-4-Stock-Ex", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-4-Stock-Ex" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6574588757829227 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6864461628663387 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4086102719033233 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46236458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5374833776595744 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-Stock-RP/69724e46-4038-4d3a-a8ff-e84a56bba9e8.json b/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-Stock-RP/69724e46-4038-4d3a-a8ff-e84a56bba9e8.json deleted file mode 100644 index 6690475419cf8e5d1bf8a2e054d511dd2cfd3410..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-Stock-RP/69724e46-4038-4d3a-a8ff-e84a56bba9e8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Stock-RP/1762652580.0521228", - "retrieved_timestamp": "1762652580.0521228", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Phi-4-Stock-RP", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-4-Stock-RP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6399231816025922 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6859633715492438 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35822147651006714 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47147916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5316655585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-Trim-Exp1/c13c2fd7-e271-4935-a3a6-4161cb8e4ea2.json b/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-Trim-Exp1/c13c2fd7-e271-4935-a3a6-4161cb8e4ea2.json deleted file mode 100644 index cf28bcae33959975f6b13e94a50c714d07641c44..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-4-Trim-Exp1/c13c2fd7-e271-4935-a3a6-4161cb8e4ea2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-4-Trim-Exp1/1762652580.052348", - "retrieved_timestamp": "1762652580.052348", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Phi-4-Trim-Exp1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-4-Trim-Exp1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12192538021338936 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28516626650940224 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4176875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1146941489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.503 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-Seek-4-Sce-V1/75810fb9-99b5-4707-80a8-8974bbb0844d.json b/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-Seek-4-Sce-V1/75810fb9-99b5-4707-80a8-8974bbb0844d.json deleted file mode 100644 index 167e41c3e58e5d5667113a073eb8eb617f703dcd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/bunnycore_Phi-Seek-4-Sce-V1/75810fb9-99b5-4707-80a8-8974bbb0844d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bunnycore_Phi-Seek-4-Sce-V1/1762652580.052572", - "retrieved_timestamp": "1762652580.052573", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bunnycore/Phi-Seek-4-Sce-V1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "bunnycore/Phi-Seek-4-Sce-V1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29348462080612775 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6459114889718743 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39815625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5123005319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/carsenk_phi3.5_mini_exp_825_uncensored/68315e0a-603c-4784-a567-e342a6185c07.json b/leaderboard_data/HFOpenLLMv2/microsoft/carsenk_phi3.5_mini_exp_825_uncensored/68315e0a-603c-4784-a567-e342a6185c07.json deleted file mode 100644 index 57636370f4d57813656aaa5f4b33ad98208c0b70..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/carsenk_phi3.5_mini_exp_825_uncensored/68315e0a-603c-4784-a567-e342a6185c07.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/carsenk_phi3.5_mini_exp_825_uncensored/1762652580.083884", - "retrieved_timestamp": "1762652580.083887", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "carsenk/phi3.5_mini_exp_825_uncensored", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "carsenk/phi3.5_mini_exp_825_uncensored" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13641360479084386 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29647345147918264 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36441666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11751994680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_Dolphin3.0-R1-Mistral-24B/8a641aee-1604-4910-8164-9e6d5c0652b1.json b/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_Dolphin3.0-R1-Mistral-24B/8a641aee-1604-4910-8164-9e6d5c0652b1.json deleted file mode 100644 index a4bc02bcddae39d4d9b5a5acc978bd3cef0abfcc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_Dolphin3.0-R1-Mistral-24B/8a641aee-1604-4910-8164-9e6d5c0652b1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_Dolphin3.0-R1-Mistral-24B/1762652580.112771", - "retrieved_timestamp": "1762652580.112771", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cognitivecomputations/Dolphin3.0-R1-Mistral-24B", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "cognitivecomputations/Dolphin3.0-R1-Mistral-24B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.406816136739407 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5359697041031141 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3119335347432024 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.300531914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_dolphin-2.9.1-yi-1.5-34b/4e6cb7a6-f01d-4e25-be2f-bda77af2eaf6.json b/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_dolphin-2.9.1-yi-1.5-34b/4e6cb7a6-f01d-4e25-be2f-bda77af2eaf6.json deleted file mode 100644 index b210c7df5f14bc34cbe8a128e7fefb84af45cadf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_dolphin-2.9.1-yi-1.5-34b/4e6cb7a6-f01d-4e25-be2f-bda77af2eaf6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.1-yi-1.5-34b/1762652580.113518", - "retrieved_timestamp": "1762652580.1135192", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.1-yi-1.5-34b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.1-yi-1.5-34b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3852588908540451 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6076225600626862 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1865558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45979166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4518783244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.389 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_dolphin-2.9.1-yi-1.5-9b/e1003371-d503-469d-ae41-e813d097ea43.json b/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_dolphin-2.9.1-yi-1.5-9b/e1003371-d503-469d-ae41-e813d097ea43.json deleted file mode 100644 index 47b19f228bfdad595d7203069a90f88de700b84a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_dolphin-2.9.1-yi-1.5-9b/e1003371-d503-469d-ae41-e813d097ea43.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.1-yi-1.5-9b/1762652580.113816", - "retrieved_timestamp": "1762652580.113816", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.1-yi-1.5-9b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.1-yi-1.5-9b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44653297694561545 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5484314644603556 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15181268882175228 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4348020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3966921542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_dolphin-2.9.2-Phi-3-Medium-abliterated/6f89f55f-a259-419a-b6ad-9b01b2dae9d8.json b/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_dolphin-2.9.2-Phi-3-Medium-abliterated/6f89f55f-a259-419a-b6ad-9b01b2dae9d8.json deleted file mode 100644 index 8ea810245d58bcb24ce7b27636394d4b44fefafb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_dolphin-2.9.2-Phi-3-Medium-abliterated/6f89f55f-a259-419a-b6ad-9b01b2dae9d8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.2-Phi-3-Medium-abliterated/1762652580.1142762", - "retrieved_timestamp": "1762652580.1142762", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36125369574950017 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.612322545411745 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4111770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4493849734042553 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 13.96 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_dolphin-2.9.2-Phi-3-Medium-abliterated/958ad3b8-9b65-4165-9d3c-a49e25802fd3.json b/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_dolphin-2.9.2-Phi-3-Medium-abliterated/958ad3b8-9b65-4165-9d3c-a49e25802fd3.json deleted file mode 100644 index 08339c829107627b575c6c0b91fe728e0a569d9d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_dolphin-2.9.2-Phi-3-Medium-abliterated/958ad3b8-9b65-4165-9d3c-a49e25802fd3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.2-Phi-3-Medium-abliterated/1762652580.114508", - "retrieved_timestamp": "1762652580.114509", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4123614232458765 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.638289226729353 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18202416918429004 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43492708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45246010638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 13.96 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_dolphin-2.9.2-Phi-3-Medium/36476eb7-a89a-45e1-b423-7755edfd5be1.json b/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_dolphin-2.9.2-Phi-3-Medium/36476eb7-a89a-45e1-b423-7755edfd5be1.json deleted file mode 100644 index 08d71b3ac94d7e5871abe854bb06191c9b0c4010..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_dolphin-2.9.2-Phi-3-Medium/36476eb7-a89a-45e1-b423-7755edfd5be1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.2-Phi-3-Medium/1762652580.114048", - "retrieved_timestamp": "1762652580.114049", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4247762603226107 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6456739302686527 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18277945619335348 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4190520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45553523936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": -1.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_dolphin-2.9.3-Yi-1.5-34B-32k/0e625490-b7b1-4b64-aa1e-222c4e21d7a5.json b/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_dolphin-2.9.3-Yi-1.5-34B-32k/0e625490-b7b1-4b64-aa1e-222c4e21d7a5.json deleted file mode 100644 index 6dd0aa8692049f169b7859e4441fb42ceb06a80f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_dolphin-2.9.3-Yi-1.5-34B-32k/0e625490-b7b1-4b64-aa1e-222c4e21d7a5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.3-Yi-1.5-34B-32k/1762652580.115152", - "retrieved_timestamp": "1762652580.115152", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.3-Yi-1.5-34B-32k", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.3-Yi-1.5-34B-32k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3639266036339136 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6046995537773227 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16691842900302115 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43105208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4630152925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 34.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_dolphin-2.9.3-mistral-7B-32k/4a0bc836-88b7-4d6e-9f0d-321ff75b1733.json b/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_dolphin-2.9.3-mistral-7B-32k/4a0bc836-88b7-4d6e-9f0d-321ff75b1733.json deleted file mode 100644 index fa4e293d0138c71fdfb538ca4ae2029b2bc02ada..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_dolphin-2.9.3-mistral-7B-32k/4a0bc836-88b7-4d6e-9f0d-321ff75b1733.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.3-mistral-7B-32k/1762652580.1153762", - "retrieved_timestamp": "1762652580.115377", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.3-mistral-7B-32k", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.3-mistral-7B-32k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4126362495955177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48125401481062013 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4642604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2820811170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_dolphin-2.9.3-mistral-nemo-12b/05488c6f-dfd4-4481-a3d4-15a918b115d3.json b/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_dolphin-2.9.3-mistral-nemo-12b/05488c6f-dfd4-4481-a3d4-15a918b115d3.json deleted file mode 100644 index 9e92badaad3215db6936da3a98ac6101fc07c6dd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/cognitivecomputations_dolphin-2.9.3-mistral-nemo-12b/05488c6f-dfd4-4481-a3d4-15a918b115d3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.3-mistral-nemo-12b/1762652580.115594", - "retrieved_timestamp": "1762652580.115595", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cognitivecomputations/dolphin-2.9.3-mistral-nemo-12b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "cognitivecomputations/dolphin-2.9.3-mistral-nemo-12b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5600894515441251 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5480369183144175 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4429895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3376828457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/ehristoforu_phi-4-25b/d11d7e47-f9e0-4502-9e71-0654819c3cd4.json b/leaderboard_data/HFOpenLLMv2/microsoft/ehristoforu_phi-4-25b/d11d7e47-f9e0-4502-9e71-0654819c3cd4.json deleted file mode 100644 index f7766aef3af9fe11b2703fde3a96c757d4cf3bf1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/ehristoforu_phi-4-25b/d11d7e47-f9e0-4502-9e71-0654819c3cd4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_phi-4-25b/1762652580.144644", - "retrieved_timestamp": "1762652580.1446452", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/phi-4-25b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "ehristoforu/phi-4-25b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6483663346587056 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6907778236877188 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.452416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4207916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5350731382978723 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 24.883 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/ehristoforu_ruphi-4b/70337ca5-7810-4e52-8382-0c2568a6ab70.json b/leaderboard_data/HFOpenLLMv2/microsoft/ehristoforu_ruphi-4b/70337ca5-7810-4e52-8382-0c2568a6ab70.json deleted file mode 100644 index b9bfd1744c2ad9efe501b813c2ae7ef3e59e09b1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/ehristoforu_ruphi-4b/70337ca5-7810-4e52-8382-0c2568a6ab70.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ehristoforu_ruphi-4b/1762652580.1457548", - "retrieved_timestamp": "1762652580.145756", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ehristoforu/ruphi-4b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "ehristoforu/ruphi-4b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17518185082248433 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29060336568338 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23993288590604026 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35117708333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11261635638297872 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/fhai50032_Unaligned-Thinker-PHI-4/bda90ce2-cb80-4942-8492-28329d7f5aeb.json b/leaderboard_data/HFOpenLLMv2/microsoft/fhai50032_Unaligned-Thinker-PHI-4/bda90ce2-cb80-4942-8492-28329d7f5aeb.json deleted file mode 100644 index 057694ee4615027b4a1790bf6bebf871c43ee435..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/fhai50032_Unaligned-Thinker-PHI-4/bda90ce2-cb80-4942-8492-28329d7f5aeb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/fhai50032_Unaligned-Thinker-PHI-4/1762652580.154337", - "retrieved_timestamp": "1762652580.1543381", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "fhai50032/Unaligned-Thinker-PHI-4", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "fhai50032/Unaligned-Thinker-PHI-4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.056254072527560206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6642576780946753 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33534743202416917 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4678541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5147107712765957 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/hotmailuser_Phi4-Slerp4-14B/da866c81-296f-463c-962b-6b871d6fb633.json b/leaderboard_data/HFOpenLLMv2/microsoft/hotmailuser_Phi4-Slerp4-14B/da866c81-296f-463c-962b-6b871d6fb633.json deleted file mode 100644 index c034de7064cf038e644d49750a870da94b31567d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/hotmailuser_Phi4-Slerp4-14B/da866c81-296f-463c-962b-6b871d6fb633.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_Phi4-Slerp4-14B/1762652580.1958668", - "retrieved_timestamp": "1762652580.195868", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/Phi4-Slerp4-14B", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "hotmailuser/Phi4-Slerp4-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0629485321170051 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6731037909447855 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39681208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5097395833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5277593085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/magnifi_Phi3_intent_v56_3_w_unknown_5_lr_0.002/c78d1aaf-9975-45d6-9a8d-eed76f7e0a0f.json b/leaderboard_data/HFOpenLLMv2/microsoft/magnifi_Phi3_intent_v56_3_w_unknown_5_lr_0.002/c78d1aaf-9975-45d6-9a8d-eed76f7e0a0f.json deleted file mode 100644 index 357cf506be9d6538b19f8644db15cf23f2ccf392..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/magnifi_Phi3_intent_v56_3_w_unknown_5_lr_0.002/c78d1aaf-9975-45d6-9a8d-eed76f7e0a0f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/magnifi_Phi3_intent_v56_3_w_unknown_5_lr_0.002/1762652580.32982", - "retrieved_timestamp": "1762652580.329825", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "magnifi/Phi3_intent_v56_3_w_unknown_5_lr_0.002", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "magnifi/Phi3_intent_v56_3_w_unknown_5_lr_0.002" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20181008612703183 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3281563256810973 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41229166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1471908244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Orca-2-13b/4f9c7197-1eb6-45eb-851e-46707017fe7f.json b/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Orca-2-13b/4f9c7197-1eb6-45eb-851e-46707017fe7f.json deleted file mode 100644 index 54849eb1ff544551dbcbdcae0f5843c982fadda2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Orca-2-13b/4f9c7197-1eb6-45eb-851e-46707017fe7f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/microsoft_Orca-2-13b/1762652580.3541", - "retrieved_timestamp": "1762652580.3541", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "microsoft/Orca-2-13b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/Orca-2-13b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3127933882099496 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48844897288396094 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5129687500000001 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27493351063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Orca-2-7b/c13a5d55-44f7-43fc-a633-9af7677a26fb.json b/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Orca-2-7b/c13a5d55-44f7-43fc-a633-9af7677a26fb.json deleted file mode 100644 index 2ff396fb94a2b9b50a6d86d8a868194f48f0d30b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Orca-2-7b/c13a5d55-44f7-43fc-a633-9af7677a26fb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/microsoft_Orca-2-7b/1762652580.354311", - "retrieved_timestamp": "1762652580.354312", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "microsoft/Orca-2-7b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/Orca-2-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2183462102776189 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4452132267545943 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5026145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23188164893617022 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3-medium-128k-instruct/0c2670d3-1fb5-4825-860f-dc84dbd7bb99.json b/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3-medium-128k-instruct/0c2670d3-1fb5-4825-860f-dc84dbd7bb99.json deleted file mode 100644 index 18046884c033aa0de06308c04c50794f7320115b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3-medium-128k-instruct/0c2670d3-1fb5-4825-860f-dc84dbd7bb99.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-medium-128k-instruct/1762652580.354526", - "retrieved_timestamp": "1762652580.354527", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "microsoft/Phi-3-medium-128k-instruct", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/Phi-3-medium-128k-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6040029344361849 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6382322530870549 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19184290030211482 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4129479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47116023936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.96 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3-medium-4k-instruct/1b921ad2-9ed3-46d5-ab65-f125ce97b35f.json b/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3-medium-4k-instruct/1b921ad2-9ed3-46d5-ab65-f125ce97b35f.json deleted file mode 100644 index 03ef8bc6cfe2f2b5047d38b78a7da86bf90cef66..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3-medium-4k-instruct/1b921ad2-9ed3-46d5-ab65-f125ce97b35f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-medium-4k-instruct/1762652580.354986", - "retrieved_timestamp": "1762652580.35499", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "microsoft/Phi-3-medium-4k-instruct", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/Phi-3-medium-4k-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6422713954529538 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6412464890555547 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19561933534743203 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42575 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4675864361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.96 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3-mini-128k-instruct/0bcfeb34-8944-4f16-83d8-6fe851c39af6.json b/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3-mini-128k-instruct/0bcfeb34-8944-4f16-83d8-6fe851c39af6.json deleted file mode 100644 index 17204a4c1313c0ff79d9c90fb8124730d7555ad9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3-mini-128k-instruct/0bcfeb34-8944-4f16-83d8-6fe851c39af6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-mini-128k-instruct/1762652580.355347", - "retrieved_timestamp": "1762652580.3553479", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "microsoft/Phi-3-mini-128k-instruct", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/Phi-3-mini-128k-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5976331688807919 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5574531792679852 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1404833836858006 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3936875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3734208776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3-mini-4k-instruct/0c861cdd-1ddb-43a1-991b-300887e1da1b.json b/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3-mini-4k-instruct/0c861cdd-1ddb-43a1-991b-300887e1da1b.json deleted file mode 100644 index 2a21db0486709a7aaf7fe0ff5aaaf8371699813f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3-mini-4k-instruct/0c861cdd-1ddb-43a1-991b-300887e1da1b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-mini-4k-instruct/1762652580.355623", - "retrieved_timestamp": "1762652580.355624", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "microsoft/Phi-3-mini-4k-instruct", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/Phi-3-mini-4k-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5612884923115112 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5675972626334875 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163141993957704 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3950208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38663563829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3-mini-4k-instruct/97e50198-ba06-4c17-81d3-59270b71a89d.json b/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3-mini-4k-instruct/97e50198-ba06-4c17-81d3-59270b71a89d.json deleted file mode 100644 index a70ef8dc2d427690b23b3c42b4e5992cd3ee46de..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3-mini-4k-instruct/97e50198-ba06-4c17-81d3-59270b71a89d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-mini-4k-instruct/1762652580.355825", - "retrieved_timestamp": "1762652580.355826", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "microsoft/Phi-3-mini-4k-instruct", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/Phi-3-mini-4k-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.547674614467391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5490718919495822 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16389728096676737 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42841666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4021775265957447 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3-small-128k-instruct/f7c1a443-006b-4ade-9b0f-895392e52b7c.json b/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3-small-128k-instruct/f7c1a443-006b-4ade-9b0f-895392e52b7c.json deleted file mode 100644 index da0bdaff40780ad6d85bc4d3b5d3f624757fcc6f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3-small-128k-instruct/f7c1a443-006b-4ade-9b0f-895392e52b7c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-small-128k-instruct/1762652580.356006", - "retrieved_timestamp": "1762652580.356006", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "microsoft/Phi-3-small-128k-instruct", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/Phi-3-small-128k-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6368258443153056 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6202176778696983 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2026086956521739 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43784375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4490525265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3SmallForCausalLM", - "params_billions": 7.392 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3-small-8k-instruct/f4c62b5d-fc1d-4421-9be8-e7e4af642284.json b/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3-small-8k-instruct/f4c62b5d-fc1d-4421-9be8-e7e4af642284.json deleted file mode 100644 index 53e68f07f3e8c15b6123082c4ac72984ff9f107d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3-small-8k-instruct/f4c62b5d-fc1d-4421-9be8-e7e4af642284.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/microsoft_Phi-3-small-8k-instruct/1762652580.356211", - "retrieved_timestamp": "1762652580.356212", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "microsoft/Phi-3-small-8k-instruct", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/Phi-3-small-8k-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6496651107949131 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6208364880870563 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18869565217391304 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45579166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4506316489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3SmallForCausalLM", - "params_billions": 7.392 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3.5-MoE-instruct/ae57c3e7-4042-43eb-baa2-b033d1b4867c.json b/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3.5-MoE-instruct/ae57c3e7-4042-43eb-baa2-b033d1b4867c.json deleted file mode 100644 index ce486cfd89a042923c0fbfe27de1c5a83999b43f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3.5-MoE-instruct/ae57c3e7-4042-43eb-baa2-b033d1b4867c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/microsoft_Phi-3.5-MoE-instruct/1762652580.356415", - "retrieved_timestamp": "1762652580.356415", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "microsoft/Phi-3.5-MoE-instruct", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/Phi-3.5-MoE-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.692454908531585 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.640762564622586 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3119335347432024 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35570469798657717 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4564791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46575797872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 42.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3.5-mini-instruct/42448d73-f9e0-4eb2-bd6a-74614d08d55c.json b/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3.5-mini-instruct/42448d73-f9e0-4eb2-bd6a-74614d08d55c.json deleted file mode 100644 index 2edd0466336a2deab15f9f2e50504c3397511bfc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-3.5-mini-instruct/42448d73-f9e0-4eb2-bd6a-74614d08d55c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/microsoft_Phi-3.5-mini-instruct/1762652580.356627", - "retrieved_timestamp": "1762652580.356628", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "microsoft/Phi-3.5-mini-instruct", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/Phi-3.5-mini-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5774500547436359 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5517785126111956 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19637462235649547 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976510067114096 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.402125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39619348404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-4-mini-instruct/1d02fe1c-f31d-4d38-a8c3-dc427e25cb80.json b/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-4-mini-instruct/1d02fe1c-f31d-4d38-a8c3-dc427e25cb80.json deleted file mode 100644 index f5c81f74ab389a5382782525782baa9496d29932..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_Phi-4-mini-instruct/1d02fe1c-f31d-4d38-a8c3-dc427e25cb80.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/microsoft_Phi-4-mini-instruct/1762652580.356846", - "retrieved_timestamp": "1762652580.356847", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "microsoft/Phi-4-mini-instruct", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/Phi-4-mini-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7377923908562614 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.568862935505404 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16993957703927492 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39320146276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 3.836 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_phi-1/b88d579f-6bc7-4aee-a117-28786cba3300.json b/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_phi-1/b88d579f-6bc7-4aee-a117-28786cba3300.json deleted file mode 100644 index a91093514b8da36fbabf22a2fb138d03f071aad3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_phi-1/b88d579f-6bc7-4aee-a117-28786cba3300.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/microsoft_phi-1/1762652580.357049", - "retrieved_timestamp": "1762652580.3570502", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "microsoft/phi-1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/phi-1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20680571993421898 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31394755895837845 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35251041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11619015957446809 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "PhiForCausalLM", - "params_billions": 1.418 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_phi-1_5/0bc55439-f6a1-4588-858a-082907876d6e.json b/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_phi-1_5/0bc55439-f6a1-4588-858a-082907876d6e.json deleted file mode 100644 index 8e9ce12cc75b6471faa43bd436609f05181d1037..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_phi-1_5/0bc55439-f6a1-4588-858a-082907876d6e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/microsoft_phi-1_5/1762652580.357298", - "retrieved_timestamp": "1762652580.357298", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "microsoft/phi-1_5", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/phi-1_5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2032839532440591 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33597583211996657 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34041666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16913231382978725 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 1.418 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_phi-2/e38ef3e4-585f-46de-beb4-c794d767b579.json b/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_phi-2/e38ef3e4-585f-46de-beb4-c794d767b579.json deleted file mode 100644 index ba898b966768a84a87461e0c24bb05ba82134b8e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_phi-2/e38ef3e4-585f-46de-beb4-c794d767b579.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/microsoft_phi-2/1762652580.357496", - "retrieved_timestamp": "1762652580.357497", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "microsoft/phi-2", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/phi-2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.273875539125077 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4881208771249696 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4098958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26279920212765956 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_phi-4/5481936f-d52a-486b-871e-d2e48c1b0278.json b/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_phi-4/5481936f-d52a-486b-871e-d2e48c1b0278.json deleted file mode 100644 index 774da0dc6b91b4b6c3bdc18758ee7f0382a46356..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_phi-4/5481936f-d52a-486b-871e-d2e48c1b0278.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/microsoft_phi-4/1762652580.357901", - "retrieved_timestamp": "1762652580.357902", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "microsoft/phi-4", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/phi-4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0585269307659233 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6690562305322874 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3164652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40604026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5033541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5286735372340425 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_phi-4/f3ee4f04-22f1-4ddb-afb2-27b8f641042b.json b/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_phi-4/f3ee4f04-22f1-4ddb-afb2-27b8f641042b.json deleted file mode 100644 index c516533cb37c760f61b27be0a22fca1be59f68fd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/microsoft_phi-4/f3ee4f04-22f1-4ddb-afb2-27b8f641042b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/microsoft_phi-4/1762652580.3577", - "retrieved_timestamp": "1762652580.357701", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "microsoft/phi-4", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "microsoft/phi-4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.048785001573602486 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6703464626619114 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27870090634441086 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.401006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5033541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5295046542553191 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Phi3ForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/migtissera_Tess-v2.5-Phi-3-medium-128k-14B/260f2500-c920-4e3f-901b-10efc03f0390.json b/leaderboard_data/HFOpenLLMv2/microsoft/migtissera_Tess-v2.5-Phi-3-medium-128k-14B/260f2500-c920-4e3f-901b-10efc03f0390.json deleted file mode 100644 index 9ca82ee9cc29752a35057c0270d614d9dc52cefc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/migtissera_Tess-v2.5-Phi-3-medium-128k-14B/260f2500-c920-4e3f-901b-10efc03f0390.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/migtissera_Tess-v2.5-Phi-3-medium-128k-14B/1762652580.35902", - "retrieved_timestamp": "1762652580.359021", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "migtissera/Tess-v2.5-Phi-3-medium-128k-14B", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "migtissera/Tess-v2.5-Phi-3-medium-128k-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45387682460316403 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6206613823135703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41130208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3731715425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 13.96 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/mkurman_phi-4-MedIT-11B-exp-1/d64a8825-610a-4128-8c68-55150a76ed88.json b/leaderboard_data/HFOpenLLMv2/microsoft/mkurman_phi-4-MedIT-11B-exp-1/d64a8825-610a-4128-8c68-55150a76ed88.json deleted file mode 100644 index df7850dc243970ee25e82690e21c561ec3ca73d9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/mkurman_phi-4-MedIT-11B-exp-1/d64a8825-610a-4128-8c68-55150a76ed88.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mkurman_phi-4-MedIT-11B-exp-1/1762652580.3661451", - "retrieved_timestamp": "1762652580.366146", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mkurman/phi-4-MedIT-11B-exp-1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "mkurman/phi-4-MedIT-11B-exp-1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5947607902587357 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5413943771388249 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38479166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38248005319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Phi3ForCausalLM", - "params_billions": 11.514 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/mkurman_phi4-MedIT-10B-o1/c5a2a30d-99b0-4658-97f5-4c9be5576073.json b/leaderboard_data/HFOpenLLMv2/microsoft/mkurman_phi4-MedIT-10B-o1/c5a2a30d-99b0-4658-97f5-4c9be5576073.json deleted file mode 100644 index b438137ac61c55486b99e4963987e95397f31059..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/mkurman_phi4-MedIT-10B-o1/c5a2a30d-99b0-4658-97f5-4c9be5576073.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mkurman_phi4-MedIT-10B-o1/1762652580.366463", - "retrieved_timestamp": "1762652580.366464", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mkurman/phi4-MedIT-10B-o1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "mkurman/phi4-MedIT-10B-o1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34629117408476173 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.519820312240642 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1148036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39679166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3507313829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaMedITForCausalLM", - "params_billions": 10.255 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/mlabonne_phixtral-2x2_8/ec051c9b-9399-4c8d-8710-6a182a234890.json b/leaderboard_data/HFOpenLLMv2/microsoft/mlabonne_phixtral-2x2_8/ec051c9b-9399-4c8d-8710-6a182a234890.json deleted file mode 100644 index 589ea246066f36ee01d8d49e113f0ced7fb5ba39..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/mlabonne_phixtral-2x2_8/ec051c9b-9399-4c8d-8710-6a182a234890.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mlabonne_phixtral-2x2_8/1762652580.370162", - "retrieved_timestamp": "1762652580.370163", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mlabonne/phixtral-2x2_8", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "mlabonne/phixtral-2x2_8" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3431184811854767 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48885941873076205 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.035498489425981876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3643541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550698138297872 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 4.458 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/mrm8488_phi-4-14B-grpo-gsm8k-3e/1bd4d2fe-cd83-4a79-b102-40be8ebb6245.json b/leaderboard_data/HFOpenLLMv2/microsoft/mrm8488_phi-4-14B-grpo-gsm8k-3e/1bd4d2fe-cd83-4a79-b102-40be8ebb6245.json deleted file mode 100644 index 99101a7f94558af4d0e11fc749d13914cbc91659..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/mrm8488_phi-4-14B-grpo-gsm8k-3e/1bd4d2fe-cd83-4a79-b102-40be8ebb6245.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mrm8488_phi-4-14B-grpo-gsm8k-3e/1762652580.374398", - "retrieved_timestamp": "1762652580.374399", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mrm8488/phi-4-14B-grpo-gsm8k-3e", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "mrm8488/phi-4-14B-grpo-gsm8k-3e" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.688533092195375 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6805415739665394 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.452416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39939583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.526845079787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/mrm8488_phi-4-14B-grpo-limo/e671d26c-1d8a-4d22-b360-dc3e449886b8.json b/leaderboard_data/HFOpenLLMv2/microsoft/mrm8488_phi-4-14B-grpo-limo/e671d26c-1d8a-4d22-b360-dc3e449886b8.json deleted file mode 100644 index 33afc68ba59457a5fad8fe719e2b57782ac2e174..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/mrm8488_phi-4-14B-grpo-limo/e671d26c-1d8a-4d22-b360-dc3e449886b8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mrm8488_phi-4-14B-grpo-limo/1762652580.374649", - "retrieved_timestamp": "1762652580.37465", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mrm8488/phi-4-14B-grpo-limo", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "mrm8488/phi-4-14B-grpo-limo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.681239112222237 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.678485424233919 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4569486404833837 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3980625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5260970744680851 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/netcat420_MFANN-abliterated-phi2-merge-unretrained/a3c07d22-20d1-4878-80d5-04b949580829.json b/leaderboard_data/HFOpenLLMv2/microsoft/netcat420_MFANN-abliterated-phi2-merge-unretrained/a3c07d22-20d1-4878-80d5-04b949580829.json deleted file mode 100644 index 52df068ee00b6d8d6ac5ddbbd13df772403a4104..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/netcat420_MFANN-abliterated-phi2-merge-unretrained/a3c07d22-20d1-4878-80d5-04b949580829.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-abliterated-phi2-merge-unretrained/1762652580.3939252", - "retrieved_timestamp": "1762652580.393926", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN-abliterated-phi2-merge-unretrained", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "netcat420/MFANN-abliterated-phi2-merge-unretrained" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3005037744296245 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4104131503721586 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31834375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14777260638297873 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.775 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/netcat420_MFANN-phigments-slerp-V2/8b4f2ab4-dcd7-4c5d-9bd0-6d7e1580c123.json b/leaderboard_data/HFOpenLLMv2/microsoft/netcat420_MFANN-phigments-slerp-V2/8b4f2ab4-dcd7-4c5d-9bd0-6d7e1580c123.json deleted file mode 100644 index 2d7be6f179824d3e0dffd8f0f2ebbf6ceedb6c10..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/netcat420_MFANN-phigments-slerp-V2/8b4f2ab4-dcd7-4c5d-9bd0-6d7e1580c123.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-phigments-slerp-V2/1762652580.3950222", - "retrieved_timestamp": "1762652580.395023", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN-phigments-slerp-V2", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "netcat420/MFANN-phigments-slerp-V2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32316032571355113 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48272762171598743 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40372916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2716921542553192 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/netcat420_MFANN-phigments-slerp-V3.2/8c4e85ce-7b8f-479c-a1dc-114c7e5ba4f1.json b/leaderboard_data/HFOpenLLMv2/microsoft/netcat420_MFANN-phigments-slerp-V3.2/8c4e85ce-7b8f-479c-a1dc-114c7e5ba4f1.json deleted file mode 100644 index c312d9693b516de47a7e0f36c31369923f7c6e09..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/netcat420_MFANN-phigments-slerp-V3.2/8c4e85ce-7b8f-479c-a1dc-114c7e5ba4f1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-phigments-slerp-V3.2/1762652580.395236", - "retrieved_timestamp": "1762652580.395236", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN-phigments-slerp-V3.2", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "netcat420/MFANN-phigments-slerp-V3.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35243598097492435 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4808549324972969 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3707708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2705285904255319 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/netcat420_MFANN-phigments-slerp-V3.3/b3466ac6-df1f-4440-9d7b-7991cac7d733.json b/leaderboard_data/HFOpenLLMv2/microsoft/netcat420_MFANN-phigments-slerp-V3.3/b3466ac6-df1f-4440-9d7b-7991cac7d733.json deleted file mode 100644 index 66cdcbe302074d59e5499d07d12ce5bf40358251..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/netcat420_MFANN-phigments-slerp-V3.3/b3466ac6-df1f-4440-9d7b-7991cac7d733.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-phigments-slerp-V3.3/1762652580.395446", - "retrieved_timestamp": "1762652580.395447", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN-phigments-slerp-V3.3", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "netcat420/MFANN-phigments-slerp-V3.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36909732842192056 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48952950463630956 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38921874999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802526595744681 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/pankajmathur_orca_mini_phi-4/f5971ede-de93-4729-8a03-b9ec3abea21e.json b/leaderboard_data/HFOpenLLMv2/microsoft/pankajmathur_orca_mini_phi-4/f5971ede-de93-4729-8a03-b9ec3abea21e.json deleted file mode 100644 index 5c89476bc981d0fe665322d3861974ab21e48ae4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/pankajmathur_orca_mini_phi-4/f5971ede-de93-4729-8a03-b9ec3abea21e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_phi-4/1762652580.435327", - "retrieved_timestamp": "1762652580.435328", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_phi-4", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_phi-4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7780588837617521 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6856329737542378 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29531722054380666 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37416107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47030208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5255152925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/prithivMLmods_Phi-4-Empathetic/a7a2af83-7047-4601-bfdd-ac25abf3890d.json b/leaderboard_data/HFOpenLLMv2/microsoft/prithivMLmods_Phi-4-Empathetic/a7a2af83-7047-4601-bfdd-ac25abf3890d.json deleted file mode 100644 index 70787926a920834effdadb20aba992d183b2220b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/prithivMLmods_Phi-4-Empathetic/a7a2af83-7047-4601-bfdd-ac25abf3890d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-Empathetic/1762652580.469516", - "retrieved_timestamp": "1762652580.469517", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Phi-4-Empathetic", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "prithivMLmods/Phi-4-Empathetic" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.049659348306936704 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6726820578371974 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2620845921450151 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49913541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5065658244680851 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/prithivMLmods_Phi-4-Math-IO/88c03059-5add-46ea-b423-4cf8496c5763.json b/leaderboard_data/HFOpenLLMv2/microsoft/prithivMLmods_Phi-4-Math-IO/88c03059-5add-46ea-b423-4cf8496c5763.json deleted file mode 100644 index 9fa80a65dea16c2ed3358b1b21cec1f321a591b3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/prithivMLmods_Phi-4-Math-IO/88c03059-5add-46ea-b423-4cf8496c5763.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-Math-IO/1762652580.469801", - "retrieved_timestamp": "1762652580.469801", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Phi-4-Math-IO", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "prithivMLmods/Phi-4-Math-IO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05897684809638426 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6668255086606543 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45770392749244715 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39848993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4872916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5205285904255319 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/prithivMLmods_Phi-4-QwQ/8e84f2de-117a-4526-9d58-86a63011a07f.json b/leaderboard_data/HFOpenLLMv2/microsoft/prithivMLmods_Phi-4-QwQ/8e84f2de-117a-4526-9d58-86a63011a07f.json deleted file mode 100644 index 24a4000dcf7f38d72aad436a007292c3b6f5b1cd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/prithivMLmods_Phi-4-QwQ/8e84f2de-117a-4526-9d58-86a63011a07f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-QwQ/1762652580.470021", - "retrieved_timestamp": "1762652580.470022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Phi-4-QwQ", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "prithivMLmods/Phi-4-QwQ" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05592937849350833 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6695574237334824 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45770392749244715 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39093959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4650625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5275099734042553 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/prithivMLmods_Phi-4-Super-1/91c5f088-38fd-4ea7-bf95-3d6a69653cca.json b/leaderboard_data/HFOpenLLMv2/microsoft/prithivMLmods_Phi-4-Super-1/91c5f088-38fd-4ea7-bf95-3d6a69653cca.json deleted file mode 100644 index 3403bd59ee01d99a704c958347c239554f027345..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/prithivMLmods_Phi-4-Super-1/91c5f088-38fd-4ea7-bf95-3d6a69653cca.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-Super-1/1762652580.470496", - "retrieved_timestamp": "1762652580.470498", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Phi-4-Super-1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "prithivMLmods/Phi-4-Super-1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04176584795010572 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.672933647971901 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35196374622356497 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5017395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5235206117021277 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/prithivMLmods_Phi-4-Super-o1/b90749f4-0542-42b6-a708-4e14bc586ad1.json b/leaderboard_data/HFOpenLLMv2/microsoft/prithivMLmods_Phi-4-Super-o1/b90749f4-0542-42b6-a708-4e14bc586ad1.json deleted file mode 100644 index e1c1eebda77f08906b24da87cd78e13551aa4e18..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/prithivMLmods_Phi-4-Super-o1/b90749f4-0542-42b6-a708-4e14bc586ad1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-Super-o1/1762652580.470741", - "retrieved_timestamp": "1762652580.470741", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Phi-4-Super-o1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "prithivMLmods/Phi-4-Super-o1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04176584795010572 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.672933647971901 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35196374622356497 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5017395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5235206117021277 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/prithivMLmods_Phi-4-Super/ec19309c-9bbe-4d42-894d-3638dbe5dfac.json b/leaderboard_data/HFOpenLLMv2/microsoft/prithivMLmods_Phi-4-Super/ec19309c-9bbe-4d42-894d-3638dbe5dfac.json deleted file mode 100644 index a8bce0272c122ab1a75f8654637d00de285736fe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/prithivMLmods_Phi-4-Super/ec19309c-9bbe-4d42-894d-3638dbe5dfac.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-Super/1762652580.470242", - "retrieved_timestamp": "1762652580.470242", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Phi-4-Super", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "prithivMLmods/Phi-4-Super" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04813561350549875 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6720116458521787 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34894259818731116 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39429530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.504375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.526595744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/prithivMLmods_Phi-4-o1/d58bf1bb-e269-4741-a9f1-be242443ad4a.json b/leaderboard_data/HFOpenLLMv2/microsoft/prithivMLmods_Phi-4-o1/d58bf1bb-e269-4741-a9f1-be242443ad4a.json deleted file mode 100644 index dfba4211bc631999d12156e3854f054b25e55d07..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/prithivMLmods_Phi-4-o1/d58bf1bb-e269-4741-a9f1-be242443ad4a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi-4-o1/1762652580.470958", - "retrieved_timestamp": "1762652580.4709592", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Phi-4-o1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "prithivMLmods/Phi-4-o1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028976449154908976 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6688727399756971 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3995468277945619 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3825503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49777083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5173703457446809 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/prithivMLmods_Phi4-Super/07ee76dd-a928-469b-912e-cfd2e0a26ef9.json b/leaderboard_data/HFOpenLLMv2/microsoft/prithivMLmods_Phi4-Super/07ee76dd-a928-469b-912e-cfd2e0a26ef9.json deleted file mode 100644 index 77b43b1e41c3beef840b8fedcfb0180315289546..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/prithivMLmods_Phi4-Super/07ee76dd-a928-469b-912e-cfd2e0a26ef9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Phi4-Super/1762652580.471183", - "retrieved_timestamp": "1762652580.4711838", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Phi4-Super", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "prithivMLmods/Phi4-Super" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04813561350549875 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6720116458521787 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34894259818731116 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39429530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.504375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.526595744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/rhysjones_phi-2-orange-v2/bf679659-f55f-43c8-86b5-ed7805e8c3ee.json b/leaderboard_data/HFOpenLLMv2/microsoft/rhysjones_phi-2-orange-v2/bf679659-f55f-43c8-86b5-ed7805e8c3ee.json deleted file mode 100644 index 50a25654e099d487771f44493fb33ffacd8f3a87..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/rhysjones_phi-2-orange-v2/bf679659-f55f-43c8-86b5-ed7805e8c3ee.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rhysjones_phi-2-orange-v2/1762652580.495306", - "retrieved_timestamp": "1762652580.495307", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rhysjones/phi-2-orange-v2", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "rhysjones/phi-2-orange-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3669740732367895 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4770220109816213 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3629583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25324135638297873 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/suayptalha_Luminis-phi-4/ace18207-a255-447d-9aba-8afdee092164.json b/leaderboard_data/HFOpenLLMv2/microsoft/suayptalha_Luminis-phi-4/ace18207-a255-447d-9aba-8afdee092164.json deleted file mode 100644 index 1e96340db736d42e1053dabe5fd7190a52dbef96..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/suayptalha_Luminis-phi-4/ace18207-a255-447d-9aba-8afdee092164.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/suayptalha_Luminis-phi-4/1762652580.544511", - "retrieved_timestamp": "1762652580.544511", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "suayptalha/Luminis-phi-4", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "suayptalha/Luminis-phi-4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6900069593124022 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6920213038130584 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4637462235649547 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45715625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5423869680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/tensopolis_phi-4-tensopolis-v1/bcbdde44-0736-4162-9faf-cd9d8e89d360.json b/leaderboard_data/HFOpenLLMv2/microsoft/tensopolis_phi-4-tensopolis-v1/bcbdde44-0736-4162-9faf-cd9d8e89d360.json deleted file mode 100644 index 01c0a0d66bd002f850c3068871bf450ad868b26f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/tensopolis_phi-4-tensopolis-v1/bcbdde44-0736-4162-9faf-cd9d8e89d360.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tensopolis_phi-4-tensopolis-v1/1762652580.5562031", - "retrieved_timestamp": "1762652580.5562031", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tensopolis/phi-4-tensopolis-v1", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "tensopolis/phi-4-tensopolis-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6766679078179231 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6871833310149728 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49395770392749244 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347315436241611 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4140625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5383976063829787 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/theprint_phi-3-mini-4k-python/f017d759-59fe-42a3-947d-a4b787f084d7.json b/leaderboard_data/HFOpenLLMv2/microsoft/theprint_phi-3-mini-4k-python/f017d759-59fe-42a3-947d-a4b787f084d7.json deleted file mode 100644 index 604e87b1b41e94d714027e8523952111e91c34da..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/theprint_phi-3-mini-4k-python/f017d759-59fe-42a3-947d-a4b787f084d7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/theprint_phi-3-mini-4k-python/1762652580.5645702", - "retrieved_timestamp": "1762652580.564571", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "theprint/phi-3-mini-4k-python", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "theprint/phi-3-mini-4k-python" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24087753826513653 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.493759004635898 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10498489425981873 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3921666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35771276595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 4.132 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/unsloth_phi-4-bnb-4bit/c8cfc527-9a58-45e7-a8e0-39caacd8bd58.json b/leaderboard_data/HFOpenLLMv2/microsoft/unsloth_phi-4-bnb-4bit/c8cfc527-9a58-45e7-a8e0-39caacd8bd58.json deleted file mode 100644 index 3a98ddf470b0f348b79a889c09845fde0d3078ef..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/unsloth_phi-4-bnb-4bit/c8cfc527-9a58-45e7-a8e0-39caacd8bd58.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/unsloth_phi-4-bnb-4bit/1762652580.579705", - "retrieved_timestamp": "1762652580.579705", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "unsloth/phi-4-bnb-4bit", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "unsloth/phi-4-bnb-4bit" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6729710501469435 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6769854242339189 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40072916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5255984042553191 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.058 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/unsloth_phi-4-unsloth-bnb-4bit/3bdd8e19-fd61-4d1e-96b1-cdadd4c2d67f.json b/leaderboard_data/HFOpenLLMv2/microsoft/unsloth_phi-4-unsloth-bnb-4bit/3bdd8e19-fd61-4d1e-96b1-cdadd4c2d67f.json deleted file mode 100644 index 5a6adb3fa5849c5b1ee8928b5fed823451dda325..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/unsloth_phi-4-unsloth-bnb-4bit/3bdd8e19-fd61-4d1e-96b1-cdadd4c2d67f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/unsloth_phi-4-unsloth-bnb-4bit/1762652580.579966", - "retrieved_timestamp": "1762652580.579967", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "unsloth/phi-4-unsloth-bnb-4bit", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "unsloth/phi-4-unsloth-bnb-4bit" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6793906833867471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6791089896968764 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4561933534743202 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40339583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5285904255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.483 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/unsloth_phi-4/c6080b92-d05a-4bda-ad07-e1b59a427844.json b/leaderboard_data/HFOpenLLMv2/microsoft/unsloth_phi-4/c6080b92-d05a-4bda-ad07-e1b59a427844.json deleted file mode 100644 index 39ed363079569e6271004f5f5a989fb52d6f65ab..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/unsloth_phi-4/c6080b92-d05a-4bda-ad07-e1b59a427844.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/unsloth_phi-4/1762652580.579377", - "retrieved_timestamp": "1762652580.579378", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "unsloth/phi-4", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "unsloth/phi-4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6882083981613231 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6885874406040138 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41142708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5378158244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/microsoft/uukuguy_speechless-mistral-dolphin-orca-platypus-samantha-7b/49cd8aff-0c7a-4245-831a-f4fc64383b48.json b/leaderboard_data/HFOpenLLMv2/microsoft/uukuguy_speechless-mistral-dolphin-orca-platypus-samantha-7b/49cd8aff-0c7a-4245-831a-f4fc64383b48.json deleted file mode 100644 index a2ded30b37b49f274df79351cefcbe8a9dc44581..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/microsoft/uukuguy_speechless-mistral-dolphin-orca-platypus-samantha-7b/49cd8aff-0c7a-4245-831a-f4fc64383b48.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/uukuguy_speechless-mistral-dolphin-orca-platypus-samantha-7b/1762652580.583631", - "retrieved_timestamp": "1762652580.5836318", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "uukuguy/speechless-mistral-dolphin-orca-platypus-samantha-7b", - "developer": "microsoft", - "inference_platform": "unknown", - "id": "uukuguy/speechless-mistral-dolphin-orca-platypus-samantha-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37002154283966543 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4982774952761688 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43613541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2990359042553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/migtissera/migtissera_Tess-3-7B-SFT/cc99f18f-e75c-4cd1-a466-ac8c54877bd2.json b/leaderboard_data/HFOpenLLMv2/migtissera/migtissera_Tess-3-7B-SFT/cc99f18f-e75c-4cd1-a466-ac8c54877bd2.json deleted file mode 100644 index 5821b41cd0b23fb96acc23a4cdc3f3f0f0ad7cd9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/migtissera/migtissera_Tess-3-7B-SFT/cc99f18f-e75c-4cd1-a466-ac8c54877bd2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/migtissera_Tess-3-7B-SFT/1762652580.358523", - "retrieved_timestamp": "1762652580.3585238", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "migtissera/Tess-3-7B-SFT", - "developer": "migtissera", - "inference_platform": "unknown", - "id": "migtissera/Tess-3-7B-SFT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3946262583279033 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46073483895076217 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4112708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30335771276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/migtissera/migtissera_Trinity-2-Codestral-22B-v0.2/7320b12a-7511-441d-9d56-f7e713af4470.json b/leaderboard_data/HFOpenLLMv2/migtissera/migtissera_Trinity-2-Codestral-22B-v0.2/7320b12a-7511-441d-9d56-f7e713af4470.json deleted file mode 100644 index f1d780b6f5e06db50e900e6527455462057c54de..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/migtissera/migtissera_Trinity-2-Codestral-22B-v0.2/7320b12a-7511-441d-9d56-f7e713af4470.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/migtissera_Trinity-2-Codestral-22B-v0.2/1762652580.3597598", - "retrieved_timestamp": "1762652580.359761", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "migtissera/Trinity-2-Codestral-22B-v0.2", - "developer": "migtissera", - "inference_platform": "unknown", - "id": "migtissera/Trinity-2-Codestral-22B-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43446832183052075 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5686364683055418 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08383685800604229 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40447916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33402593085106386 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/migtissera/migtissera_Trinity-2-Codestral-22B-v0.2/a18b3d46-7e65-4cb3-b7e5-12b86f34a572.json b/leaderboard_data/HFOpenLLMv2/migtissera/migtissera_Trinity-2-Codestral-22B-v0.2/a18b3d46-7e65-4cb3-b7e5-12b86f34a572.json deleted file mode 100644 index 32c9e96bde42150d65fcaf333ed87c895d817e3f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/migtissera/migtissera_Trinity-2-Codestral-22B-v0.2/a18b3d46-7e65-4cb3-b7e5-12b86f34a572.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/migtissera_Trinity-2-Codestral-22B-v0.2/1762652580.359978", - "retrieved_timestamp": "1762652580.359979", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "migtissera/Trinity-2-Codestral-22B-v0.2", - "developer": "migtissera", - "inference_platform": "unknown", - "id": "migtissera/Trinity-2-Codestral-22B-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44301121025545553 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5706466356198404 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08685800604229607 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4031458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3353557180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/migtissera/migtissera_Trinity-2-Codestral-22B/e075cb71-eaae-46e0-917b-bf84482f76c9.json b/leaderboard_data/HFOpenLLMv2/migtissera/migtissera_Trinity-2-Codestral-22B/e075cb71-eaae-46e0-917b-bf84482f76c9.json deleted file mode 100644 index c01e29920da0e35583fc845a2fa6aff4adde54e8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/migtissera/migtissera_Trinity-2-Codestral-22B/e075cb71-eaae-46e0-917b-bf84482f76c9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/migtissera_Trinity-2-Codestral-22B/1762652580.35951", - "retrieved_timestamp": "1762652580.3595111", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "migtissera/Trinity-2-Codestral-22B", - "developer": "migtissera", - "inference_platform": "unknown", - "id": "migtissera/Trinity-2-Codestral-22B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4202050559182968 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5593244825460373 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09667673716012085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4110520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3307845744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ministral/ministral_Ministral-3b-instruct/83b6f014-f8a0-4e69-ae60-cc3a7aeaaf1c.json b/leaderboard_data/HFOpenLLMv2/ministral/ministral_Ministral-3b-instruct/83b6f014-f8a0-4e69-ae60-cc3a7aeaaf1c.json deleted file mode 100644 index 0904ef1e9a891cc503fd80ee177bb7e19edf2929..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ministral/ministral_Ministral-3b-instruct/83b6f014-f8a0-4e69-ae60-cc3a7aeaaf1c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ministral_Ministral-3b-instruct/1762652580.360654", - "retrieved_timestamp": "1762652580.360655", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ministral/Ministral-3b-instruct", - "developer": "ministral", - "inference_platform": "unknown", - "id": "ministral/Ministral-3b-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1357642167227401 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31918598478332383 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33825 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10929188829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 3.316 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/Corianas_Neural-Mistral-7B/4fb7a806-1176-474e-a039-b388f050cd45.json b/leaderboard_data/HFOpenLLMv2/mistral/Corianas_Neural-Mistral-7B/4fb7a806-1176-474e-a039-b388f050cd45.json deleted file mode 100644 index 4086c1fb51c089a6757b9a77d9de97a4c6230f68..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/Corianas_Neural-Mistral-7B/4fb7a806-1176-474e-a039-b388f050cd45.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Corianas_Neural-Mistral-7B/1762652579.511706", - "retrieved_timestamp": "1762652579.5117068", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Corianas/Neural-Mistral-7B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "Corianas/Neural-Mistral-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5489235229191878 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4428023404192858 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3872708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27376994680851063 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/Dans-DiscountModels_Mistral-7b-v0.3-Test-E0.7/393f8623-7f38-4aaa-a460-cbdcb74c2d04.json b/leaderboard_data/HFOpenLLMv2/mistral/Dans-DiscountModels_Mistral-7b-v0.3-Test-E0.7/393f8623-7f38-4aaa-a460-cbdcb74c2d04.json deleted file mode 100644 index 03d78d97a188c476ef18d9b15a8f0758067480a3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/Dans-DiscountModels_Mistral-7b-v0.3-Test-E0.7/393f8623-7f38-4aaa-a460-cbdcb74c2d04.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Mistral-7b-v0.3-Test-E0.7/1762652579.536513", - "retrieved_timestamp": "1762652579.536514", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Dans-DiscountModels/Mistral-7b-v0.3-Test-E0.7", - "developer": "mistral", - "inference_platform": "unknown", - "id": "Dans-DiscountModels/Mistral-7b-v0.3-Test-E0.7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5123538876846767 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4750220653053363 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.033987915407854986 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40051041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2744348404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/Dans-DiscountModels_mistral-7b-test-merged/5ba7e296-cdd3-40e8-b56f-cc44ef0c3dcb.json b/leaderboard_data/HFOpenLLMv2/mistral/Dans-DiscountModels_mistral-7b-test-merged/5ba7e296-cdd3-40e8-b56f-cc44ef0c3dcb.json deleted file mode 100644 index 12f16d9f8ecc665e31e96d0710fe9de758527624..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/Dans-DiscountModels_mistral-7b-test-merged/5ba7e296-cdd3-40e8-b56f-cc44ef0c3dcb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_mistral-7b-test-merged/1762652579.536763", - "retrieved_timestamp": "1762652579.536763", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Dans-DiscountModels/mistral-7b-test-merged", - "developer": "mistral", - "inference_platform": "unknown", - "id": "Dans-DiscountModels/mistral-7b-test-merged" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6678003253589365 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48981661658184755 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0445619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3753958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29778922872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/DreadPoor_felix_dies-mistral-7B-model_stock/0444a153-1852-4a0d-959e-750c933777bd.json b/leaderboard_data/HFOpenLLMv2/mistral/DreadPoor_felix_dies-mistral-7B-model_stock/0444a153-1852-4a0d-959e-750c933777bd.json deleted file mode 100644 index bdd42376cde1e87aa2ca129b226b41df0590aa03..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/DreadPoor_felix_dies-mistral-7B-model_stock/0444a153-1852-4a0d-959e-750c933777bd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DreadPoor_felix_dies-mistral-7B-model_stock/1762652579.5887182", - "retrieved_timestamp": "1762652579.5887191", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DreadPoor/felix_dies-mistral-7B-model_stock", - "developer": "mistral", - "inference_platform": "unknown", - "id": "DreadPoor/felix_dies-mistral-7B-model_stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30077860077926566 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49009180735274227 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4518229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3109208776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/EpistemeAI2_Fireball-MathMistral-Nemo-Base-2407-v2dpo/b798f31f-5fab-4f21-8689-fe832afb873b.json b/leaderboard_data/HFOpenLLMv2/mistral/EpistemeAI2_Fireball-MathMistral-Nemo-Base-2407-v2dpo/b798f31f-5fab-4f21-8689-fe832afb873b.json deleted file mode 100644 index 3b508cd762ab0d53a79f2696e946a188d83edc4f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/EpistemeAI2_Fireball-MathMistral-Nemo-Base-2407-v2dpo/b798f31f-5fab-4f21-8689-fe832afb873b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-MathMistral-Nemo-Base-2407-v2dpo/1762652579.612103", - "retrieved_timestamp": "1762652579.612104", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EpistemeAI2/Fireball-MathMistral-Nemo-Base-2407-v2dpo", - "developer": "mistral", - "inference_platform": "unknown", - "id": "EpistemeAI2/Fireball-MathMistral-Nemo-Base-2407-v2dpo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30972043067948596 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43276373285682107 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4029583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11477726063829788 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 11.58 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/FuJhen_mistral_7b_v0.1_structedData_e2e/3ba2b06b-b44a-4ad6-bf38-f1602995c2f9.json b/leaderboard_data/HFOpenLLMv2/mistral/FuJhen_mistral_7b_v0.1_structedData_e2e/3ba2b06b-b44a-4ad6-bf38-f1602995c2f9.json deleted file mode 100644 index c1ee875f94b298ea1d9d393d32a13c5b8ea1ec62..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/FuJhen_mistral_7b_v0.1_structedData_e2e/3ba2b06b-b44a-4ad6-bf38-f1602995c2f9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FuJhen_mistral_7b_v0.1_structedData_e2e/1762652579.625389", - "retrieved_timestamp": "1762652579.62539", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FuJhen/mistral_7b_v0.1_structedData_e2e", - "developer": "mistral", - "inference_platform": "unknown", - "id": "FuJhen/mistral_7b_v0.1_structedData_e2e" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17268403391889076 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4113914854984489 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3722916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2810837765957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/FuJhen_mistral_7b_v0.1_structedData_viggo/3008b476-f005-4672-a953-c86b29ba3ef2.json b/leaderboard_data/HFOpenLLMv2/mistral/FuJhen_mistral_7b_v0.1_structedData_viggo/3008b476-f005-4672-a953-c86b29ba3ef2.json deleted file mode 100644 index bc8170a336d6102cceca6eb46c856adb4c13fb95..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/FuJhen_mistral_7b_v0.1_structedData_viggo/3008b476-f005-4672-a953-c86b29ba3ef2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/FuJhen_mistral_7b_v0.1_structedData_viggo/1762652579.625654", - "retrieved_timestamp": "1762652579.625655", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "FuJhen/mistral_7b_v0.1_structedData_viggo", - "developer": "mistral", - "inference_platform": "unknown", - "id": "FuJhen/mistral_7b_v0.1_structedData_viggo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17832905579418165 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45238634545986817 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37381250000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2942154255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 14.483 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/Locutusque_TinyMistral-248M-v2.5/9a3f7863-0041-4473-b3f0-ad25f0d9310f.json b/leaderboard_data/HFOpenLLMv2/mistral/Locutusque_TinyMistral-248M-v2.5/9a3f7863-0041-4473-b3f0-ad25f0d9310f.json deleted file mode 100644 index e45b4c2a4c098b40bb986e1e5eee91a4c7f68faf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/Locutusque_TinyMistral-248M-v2.5/9a3f7863-0041-4473-b3f0-ad25f0d9310f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Locutusque_TinyMistral-248M-v2.5/1762652579.73623", - "retrieved_timestamp": "1762652579.7362418", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Locutusque/TinyMistral-248M-v2.5", - "developer": "mistral", - "inference_platform": "unknown", - "id": "Locutusque/TinyMistral-248M-v2.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1336409615376091 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30385761123260785 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37815624999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11353058510638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 0.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/M4-ai_TinyMistral-248M-v3/830423e1-ec14-4477-8c82-8516bb8e954f.json b/leaderboard_data/HFOpenLLMv2/mistral/M4-ai_TinyMistral-248M-v3/830423e1-ec14-4477-8c82-8516bb8e954f.json deleted file mode 100644 index 8f2ea0d5b8b378880904064bb2d907629b1d396c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/M4-ai_TinyMistral-248M-v3/830423e1-ec14-4477-8c82-8516bb8e954f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/M4-ai_TinyMistral-248M-v3/1762652579.742201", - "retrieved_timestamp": "1762652579.742202", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "M4-ai/TinyMistral-248M-v3", - "developer": "mistral", - "inference_platform": "unknown", - "id": "M4-ai/TinyMistral-248M-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16386631914431488 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2884549938995566 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2407718120805369 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3793333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11319813829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 0.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/Marsouuu_MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial/5cd26359-d15a-4d0b-92f1-c31101e7b993.json b/leaderboard_data/HFOpenLLMv2/mistral/Marsouuu_MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial/5cd26359-d15a-4d0b-92f1-c31101e7b993.json deleted file mode 100644 index 1eb4e6ff65ad2f33a9b27a32d170f3c1173e0048..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/Marsouuu_MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial/5cd26359-d15a-4d0b-92f1-c31101e7b993.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Marsouuu_MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial/1762652579.7477188", - "retrieved_timestamp": "1762652579.74772", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Marsouuu/MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial", - "developer": "mistral", - "inference_platform": "unknown", - "id": "Marsouuu/MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16973629968483622 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464368053320647 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3990833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13788231382978725 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.16 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/NousResearch_DeepHermes-3-Mistral-24B-Preview/b1f439ee-711a-41b8-b63d-dd28cb63266e.json b/leaderboard_data/HFOpenLLMv2/mistral/NousResearch_DeepHermes-3-Mistral-24B-Preview/b1f439ee-711a-41b8-b63d-dd28cb63266e.json deleted file mode 100644 index f0d1464be81f0b8df4d89debd307cef665671385..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/NousResearch_DeepHermes-3-Mistral-24B-Preview/b1f439ee-711a-41b8-b63d-dd28cb63266e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NousResearch_DeepHermes-3-Mistral-24B-Preview/1762652579.78962", - "retrieved_timestamp": "1762652579.7896209", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NousResearch/DeepHermes-3-Mistral-24B-Preview", - "developer": "mistral", - "inference_platform": "unknown", - "id": "NousResearch/DeepHermes-3-Mistral-24B-Preview" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45357761849669986 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6488196385442672 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25755287009063443 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699664429530201 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4503333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45902593085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/NousResearch_Hermes-2-Pro-Mistral-7B/b8d954d0-a820-4927-a7f8-b0083cf9db9c.json b/leaderboard_data/HFOpenLLMv2/mistral/NousResearch_Hermes-2-Pro-Mistral-7B/b8d954d0-a820-4927-a7f8-b0083cf9db9c.json deleted file mode 100644 index 9ef2e2e675f632286d2b42d373bbd609f0668a52..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/NousResearch_Hermes-2-Pro-Mistral-7B/b8d954d0-a820-4927-a7f8-b0083cf9db9c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NousResearch_Hermes-2-Pro-Mistral-7B/1762652579.790145", - "retrieved_timestamp": "1762652579.790146", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NousResearch/Hermes-2-Pro-Mistral-7B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "NousResearch/Hermes-2-Pro-Mistral-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5668337788179807 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4995435330498075 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43759375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29463098404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/NousResearch_Yarn-Mistral-7b-128k/c6411eb6-8304-49e6-ac7b-5300deb27c55.json b/leaderboard_data/HFOpenLLMv2/mistral/NousResearch_Yarn-Mistral-7b-128k/c6411eb6-8304-49e6-ac7b-5300deb27c55.json deleted file mode 100644 index 7b65b043cdfece75f29a6c2cb32c3de54b9b0ba2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/NousResearch_Yarn-Mistral-7b-128k/c6411eb6-8304-49e6-ac7b-5300deb27c55.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Mistral-7b-128k/1762652579.793008", - "retrieved_timestamp": "1762652579.7930088", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NousResearch/Yarn-Mistral-7b-128k", - "developer": "mistral", - "inference_platform": "unknown", - "id": "NousResearch/Yarn-Mistral-7b-128k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19336693307091848 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4314467711273296 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.289311835106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/NousResearch_Yarn-Mistral-7b-64k/c7fcd944-78ab-422d-b0ef-8dc394266473.json b/leaderboard_data/HFOpenLLMv2/mistral/NousResearch_Yarn-Mistral-7b-64k/c7fcd944-78ab-422d-b0ef-8dc394266473.json deleted file mode 100644 index caac7fc8ea19eec23821940bb27dc001442c80f5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/NousResearch_Yarn-Mistral-7b-64k/c7fcd944-78ab-422d-b0ef-8dc394266473.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Mistral-7b-64k/1762652579.7932239", - "retrieved_timestamp": "1762652579.793225", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NousResearch/Yarn-Mistral-7b-64k", - "developer": "mistral", - "inference_platform": "unknown", - "id": "NousResearch/Yarn-Mistral-7b-64k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2079548930171944 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42931904551037814 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41238541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2913896276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/Open-Orca_Mistral-7B-OpenOrca/c6e0aa8c-8765-4e2f-a6b2-cdeb885d29a4.json b/leaderboard_data/HFOpenLLMv2/mistral/Open-Orca_Mistral-7B-OpenOrca/c6e0aa8c-8765-4e2f-a6b2-cdeb885d29a4.json deleted file mode 100644 index 72cd9669a55ef1fd1a503b5108d1a2a63dbe7f5b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/Open-Orca_Mistral-7B-OpenOrca/c6e0aa8c-8765-4e2f-a6b2-cdeb885d29a4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Open-Orca_Mistral-7B-OpenOrca/1762652579.799384", - "retrieved_timestamp": "1762652579.799385", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Open-Orca/Mistral-7B-OpenOrca", - "developer": "mistral", - "inference_platform": "unknown", - "id": "Open-Orca/Mistral-7B-OpenOrca" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4977659277384008 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4768173517353546 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.035498489425981876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38578124999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26529255319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/PranavHarshan_LaMistral-V4/21944667-04e0-46dc-9896-eef32c26fa6b.json b/leaderboard_data/HFOpenLLMv2/mistral/PranavHarshan_LaMistral-V4/21944667-04e0-46dc-9896-eef32c26fa6b.json deleted file mode 100644 index 9b8003fec97a54d3b4b1e91e01c8eb7f0d254d92..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/PranavHarshan_LaMistral-V4/21944667-04e0-46dc-9896-eef32c26fa6b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/PranavHarshan_LaMistral-V4/1762652579.8148758", - "retrieved_timestamp": "1762652579.814877", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "PranavHarshan/LaMistral-V4", - "developer": "mistral", - "inference_platform": "unknown", - "id": "PranavHarshan/LaMistral-V4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.623861354539289 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5184255342586473 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3642916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35987367021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/Pretergeek_openchat-3.5-0106_Rebased_Mistral-7B-v0.2/56d07a1f-1f1f-4559-b57d-bee3bf884860.json b/leaderboard_data/HFOpenLLMv2/mistral/Pretergeek_openchat-3.5-0106_Rebased_Mistral-7B-v0.2/56d07a1f-1f1f-4559-b57d-bee3bf884860.json deleted file mode 100644 index d47f1582637b6b75c9b19a583b3d5728a3d90840..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/Pretergeek_openchat-3.5-0106_Rebased_Mistral-7B-v0.2/56d07a1f-1f1f-4559-b57d-bee3bf884860.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Pretergeek_openchat-3.5-0106_Rebased_Mistral-7B-v0.2/1762652579.817152", - "retrieved_timestamp": "1762652579.817153", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Pretergeek/openchat-3.5-0106_Rebased_Mistral-7B-v0.2", - "developer": "mistral", - "inference_platform": "unknown", - "id": "Pretergeek/openchat-3.5-0106_Rebased_Mistral-7B-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37062106322335847 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36271140677296004 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4840104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2829953457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/TTTXXX01_Mistral-7B-Base-SimPO2-5e-7/062d38c7-07e6-4f71-a7a3-e40a187b6f77.json b/leaderboard_data/HFOpenLLMv2/mistral/TTTXXX01_Mistral-7B-Base-SimPO2-5e-7/062d38c7-07e6-4f71-a7a3-e40a187b6f77.json deleted file mode 100644 index a34889d7289659b016483f6cd61eb01298426949..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/TTTXXX01_Mistral-7B-Base-SimPO2-5e-7/062d38c7-07e6-4f71-a7a3-e40a187b6f77.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TTTXXX01_Mistral-7B-Base-SimPO2-5e-7/1762652579.911438", - "retrieved_timestamp": "1762652579.9114392", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TTTXXX01/Mistral-7B-Base-SimPO2-5e-7", - "developer": "mistral", - "inference_platform": "unknown", - "id": "TTTXXX01/Mistral-7B-Base-SimPO2-5e-7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43918912928806675 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43195515014882774 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36041666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2765957446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/TencentARC_MetaMath-Mistral-Pro/c2274449-ebc7-4e53-94bf-82e1f6810f6b.json b/leaderboard_data/HFOpenLLMv2/mistral/TencentARC_MetaMath-Mistral-Pro/c2274449-ebc7-4e53-94bf-82e1f6810f6b.json deleted file mode 100644 index b5d6e3087e68540152e14c69c5c55eaf4359bb96..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/TencentARC_MetaMath-Mistral-Pro/c2274449-ebc7-4e53-94bf-82e1f6810f6b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TencentARC_MetaMath-Mistral-Pro/1762652579.913366", - "retrieved_timestamp": "1762652579.913366", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TencentARC/MetaMath-Mistral-Pro", - "developer": "mistral", - "inference_platform": "unknown", - "id": "TencentARC/MetaMath-Mistral-Pro" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21187670935340452 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44131618555883606 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35241666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2471742021276596 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.987 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/TencentARC_Mistral_Pro_8B_v0.1/07ac72af-fa7e-4fe2-8a67-e893edbbd206.json b/leaderboard_data/HFOpenLLMv2/mistral/TencentARC_Mistral_Pro_8B_v0.1/07ac72af-fa7e-4fe2-8a67-e893edbbd206.json deleted file mode 100644 index 3e6e79a66a214da54785e3fef040918744f3ce47..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/TencentARC_Mistral_Pro_8B_v0.1/07ac72af-fa7e-4fe2-8a67-e893edbbd206.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/TencentARC_Mistral_Pro_8B_v0.1/1762652579.913616", - "retrieved_timestamp": "1762652579.913617", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "TencentARC/Mistral_Pro_8B_v0.1", - "developer": "mistral", - "inference_platform": "unknown", - "id": "TencentARC/Mistral_Pro_8B_v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21145227995053123 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4525975968066435 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42422916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2765126329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.987 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/Triangle104_Mistral-Redemption-Arc/189f08b4-7e58-4820-9ff7-bcea4530e3dd.json b/leaderboard_data/HFOpenLLMv2/mistral/Triangle104_Mistral-Redemption-Arc/189f08b4-7e58-4820-9ff7-bcea4530e3dd.json deleted file mode 100644 index 919674bd849fcbf7f704bc9664c70cf8dfee0910..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/Triangle104_Mistral-Redemption-Arc/189f08b4-7e58-4820-9ff7-bcea4530e3dd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Mistral-Redemption-Arc/1762652579.929934", - "retrieved_timestamp": "1762652579.9299352", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Mistral-Redemption-Arc", - "developer": "mistral", - "inference_platform": "unknown", - "id": "Triangle104/Mistral-Redemption-Arc" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40289432040319684 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6254876729064861 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45951041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4509640957446808 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/Triangle104_Mistral-Small-24b-Harmony/e8d645e6-8ec4-4c0c-8cf2-8aa7e126e1f1.json b/leaderboard_data/HFOpenLLMv2/mistral/Triangle104_Mistral-Small-24b-Harmony/e8d645e6-8ec4-4c0c-8cf2-8aa7e126e1f1.json deleted file mode 100644 index f2c5c85701de8f037dfec15833dc1f0478f58e7d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/Triangle104_Mistral-Small-24b-Harmony/e8d645e6-8ec4-4c0c-8cf2-8aa7e126e1f1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Triangle104_Mistral-Small-24b-Harmony/1762652579.930191", - "retrieved_timestamp": "1762652579.9301918", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Triangle104/Mistral-Small-24b-Harmony", - "developer": "mistral", - "inference_platform": "unknown", - "id": "Triangle104/Mistral-Small-24b-Harmony" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16871234989826994 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6433732705921861 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19108761329305135 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38422818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4276041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5430518617021277 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/UCLA-AGI_Mistral7B-PairRM-SPPO-Iter1/01c4d932-bdcf-4840-83cb-e441585d70e2.json b/leaderboard_data/HFOpenLLMv2/mistral/UCLA-AGI_Mistral7B-PairRM-SPPO-Iter1/01c4d932-bdcf-4840-83cb-e441585d70e2.json deleted file mode 100644 index 2dde8e96c2a3408968a51d0cc8b34fb6389f3d6b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/UCLA-AGI_Mistral7B-PairRM-SPPO-Iter1/01c4d932-bdcf-4840-83cb-e441585d70e2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/UCLA-AGI_Mistral7B-PairRM-SPPO-Iter1/1762652579.9377868", - "retrieved_timestamp": "1762652579.937788", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter1", - "developer": "mistral", - "inference_platform": "unknown", - "id": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5047352136774869 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4468056921650662 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3991770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26953125 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/UCLA-AGI_Mistral7B-PairRM-SPPO-Iter2/b0e6d5e1-3f41-4dfc-8845-b6d028820816.json b/leaderboard_data/HFOpenLLMv2/mistral/UCLA-AGI_Mistral7B-PairRM-SPPO-Iter2/b0e6d5e1-3f41-4dfc-8845-b6d028820816.json deleted file mode 100644 index 0e74daaea2f2206e8abeed859a1d6e65884dca31..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/UCLA-AGI_Mistral7B-PairRM-SPPO-Iter2/b0e6d5e1-3f41-4dfc-8845-b6d028820816.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/UCLA-AGI_Mistral7B-PairRM-SPPO-Iter2/1762652579.937983", - "retrieved_timestamp": "1762652579.937984", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter2", - "developer": "mistral", - "inference_platform": "unknown", - "id": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4445848127413041 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4465719945610438 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40854166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2677027925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/UCLA-AGI_Mistral7B-PairRM-SPPO-Iter3/66cc8076-71be-43fc-9efb-edd8ad19a6b6.json b/leaderboard_data/HFOpenLLMv2/mistral/UCLA-AGI_Mistral7B-PairRM-SPPO-Iter3/66cc8076-71be-43fc-9efb-edd8ad19a6b6.json deleted file mode 100644 index 71d06afe8268dd99ac283c0f3481e960cd036250..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/UCLA-AGI_Mistral7B-PairRM-SPPO-Iter3/66cc8076-71be-43fc-9efb-edd8ad19a6b6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/UCLA-AGI_Mistral7B-PairRM-SPPO-Iter3/1762652579.938179", - "retrieved_timestamp": "1762652579.9381802", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter3", - "developer": "mistral", - "inference_platform": "unknown", - "id": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4350678422142138 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4396587862984616 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40711458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2657912234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/UCLA-AGI_Mistral7B-PairRM-SPPO/01613adc-1206-4695-ae19-31f2b7ee0d9d.json b/leaderboard_data/HFOpenLLMv2/mistral/UCLA-AGI_Mistral7B-PairRM-SPPO/01613adc-1206-4695-ae19-31f2b7ee0d9d.json deleted file mode 100644 index 9145ab1161a88539a11ff7c91a21edb9733c5368..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/UCLA-AGI_Mistral7B-PairRM-SPPO/01613adc-1206-4695-ae19-31f2b7ee0d9d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/UCLA-AGI_Mistral7B-PairRM-SPPO/1762652579.93755", - "retrieved_timestamp": "1762652579.93755", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "UCLA-AGI/Mistral7B-PairRM-SPPO", - "developer": "mistral", - "inference_platform": "unknown", - "id": "UCLA-AGI/Mistral7B-PairRM-SPPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43549227161708715 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4438979817093698 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39647916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26205119680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/Unbabel_TowerInstruct-Mistral-7B-v0.2/cc6d8d11-2273-41fa-95eb-5d1f7d4a2311.json b/leaderboard_data/HFOpenLLMv2/mistral/Unbabel_TowerInstruct-Mistral-7B-v0.2/cc6d8d11-2273-41fa-95eb-5d1f7d4a2311.json deleted file mode 100644 index c9e703f6cc14d5c0048e5575962a724a4124db5b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/Unbabel_TowerInstruct-Mistral-7B-v0.2/cc6d8d11-2273-41fa-95eb-5d1f7d4a2311.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Unbabel_TowerInstruct-Mistral-7B-v0.2/1762652579.938655", - "retrieved_timestamp": "1762652579.938656", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Unbabel/TowerInstruct-Mistral-7B-v0.2", - "developer": "mistral", - "inference_platform": "unknown", - "id": "Unbabel/TowerInstruct-Mistral-7B-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2843422119975 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.388195180992626 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4522291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19680851063829788 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/allknowingroger_Mistralmash1-7B-s/c5e7d08d-4430-43f6-a293-5381b2f13ca6.json b/leaderboard_data/HFOpenLLMv2/mistral/allknowingroger_Mistralmash1-7B-s/c5e7d08d-4430-43f6-a293-5381b2f13ca6.json deleted file mode 100644 index 0c48d6bea2aa38163ea81981e968542c00d317f9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/allknowingroger_Mistralmash1-7B-s/c5e7d08d-4430-43f6-a293-5381b2f13ca6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Mistralmash1-7B-s/1762652579.990727", - "retrieved_timestamp": "1762652579.990727", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Mistralmash1-7B-s", - "developer": "mistral", - "inference_platform": "unknown", - "id": "allknowingroger/Mistralmash1-7B-s" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39610012544493056 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5277485757172445 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09214501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4267083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3292885638297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/allknowingroger_Mistralmash2-7B-s/7a9d4b20-e704-4f50-a09b-ccb67d417824.json b/leaderboard_data/HFOpenLLMv2/mistral/allknowingroger_Mistralmash2-7B-s/7a9d4b20-e704-4f50-a09b-ccb67d417824.json deleted file mode 100644 index 5b97665dd676c92b7ada944cb990be9f1962f204..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/allknowingroger_Mistralmash2-7B-s/7a9d4b20-e704-4f50-a09b-ccb67d417824.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allknowingroger_Mistralmash2-7B-s/1762652579.991016", - "retrieved_timestamp": "1762652579.9910169", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allknowingroger/Mistralmash2-7B-s", - "developer": "mistral", - "inference_platform": "unknown", - "id": "allknowingroger/Mistralmash2-7B-s" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4101883003763348 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.530485814102601 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07930513595166164 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43724999999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3345246010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/allura-org_Mistral-Small-24b-Sertraline-0304/34f35618-3ecf-4704-ab7a-ec9e8a5d08c1.json b/leaderboard_data/HFOpenLLMv2/mistral/allura-org_Mistral-Small-24b-Sertraline-0304/34f35618-3ecf-4704-ab7a-ec9e8a5d08c1.json deleted file mode 100644 index 581b8851c3d01902641e89ee4db5026b82b88559..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/allura-org_Mistral-Small-24b-Sertraline-0304/34f35618-3ecf-4704-ab7a-ec9e8a5d08c1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allura-org_Mistral-Small-24b-Sertraline-0304/1762652580.007422", - "retrieved_timestamp": "1762652580.007423", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allura-org/Mistral-Small-24b-Sertraline-0304", - "developer": "mistral", - "inference_platform": "unknown", - "id": "allura-org/Mistral-Small-24b-Sertraline-0304" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6799902037704402 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6524908933699552 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22280966767371602 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35151006711409394 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4395104166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5105551861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/allura-org_Mistral-Small-Sisyphus-24b-2503/ce2ee38f-cb48-403f-894d-f2824d00a388.json b/leaderboard_data/HFOpenLLMv2/mistral/allura-org_Mistral-Small-Sisyphus-24b-2503/ce2ee38f-cb48-403f-894d-f2824d00a388.json deleted file mode 100644 index 114e7c24dd70574d7a9f048858b88488c1ff5c65..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/allura-org_Mistral-Small-Sisyphus-24b-2503/ce2ee38f-cb48-403f-894d-f2824d00a388.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/allura-org_Mistral-Small-Sisyphus-24b-2503/1762652580.007755", - "retrieved_timestamp": "1762652580.007756", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "allura-org/Mistral-Small-Sisyphus-24b-2503", - "developer": "mistral", - "inference_platform": "unknown", - "id": "allura-org/Mistral-Small-Sisyphus-24b-2503" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6848362345243952 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6269790835863639 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39768749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5127160904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/amazon_MegaBeam-Mistral-7B-300k/4729a245-9e2d-4f65-bf14-67db4bb2590f.json b/leaderboard_data/HFOpenLLMv2/mistral/amazon_MegaBeam-Mistral-7B-300k/4729a245-9e2d-4f65-bf14-67db4bb2590f.json deleted file mode 100644 index ec66e232f776c265290b105d6e24617f7a6b6910..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/amazon_MegaBeam-Mistral-7B-300k/4729a245-9e2d-4f65-bf14-67db4bb2590f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/amazon_MegaBeam-Mistral-7B-300k/1762652580.010282", - "retrieved_timestamp": "1762652580.010283", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "amazon/MegaBeam-Mistral-7B-300k", - "developer": "mistral", - "inference_platform": "unknown", - "id": "amazon/MegaBeam-Mistral-7B-300k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.520347123410329 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4227731731112974 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39799999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2549035904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/awnr_Mistral-7B-v0.1-signtensors-1-over-2/3bccbf0f-e578-426d-93bc-84364f7d8017.json b/leaderboard_data/HFOpenLLMv2/mistral/awnr_Mistral-7B-v0.1-signtensors-1-over-2/3bccbf0f-e578-426d-93bc-84364f7d8017.json deleted file mode 100644 index 5d9ae86881e331acccf6b74d7c0e3a71c5466ed7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/awnr_Mistral-7B-v0.1-signtensors-1-over-2/3bccbf0f-e578-426d-93bc-84364f7d8017.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/awnr_Mistral-7B-v0.1-signtensors-1-over-2/1762652580.020659", - "retrieved_timestamp": "1762652580.020659", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "awnr/Mistral-7B-v0.1-signtensors-1-over-2", - "developer": "mistral", - "inference_platform": "unknown", - "id": "awnr/Mistral-7B-v0.1-signtensors-1-over-2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21792178087474567 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4422884892437673 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.033987915407854986 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40060416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2999501329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/awnr_Mistral-7B-v0.1-signtensors-1-over-4/ac1010e3-b3d8-4b61-ba79-0dcedb68619d.json b/leaderboard_data/HFOpenLLMv2/mistral/awnr_Mistral-7B-v0.1-signtensors-1-over-4/ac1010e3-b3d8-4b61-ba79-0dcedb68619d.json deleted file mode 100644 index 191109d273ab49323b6371f09bae06108c4e8fce..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/awnr_Mistral-7B-v0.1-signtensors-1-over-4/ac1010e3-b3d8-4b61-ba79-0dcedb68619d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/awnr_Mistral-7B-v0.1-signtensors-1-over-4/1762652580.0209029", - "retrieved_timestamp": "1762652580.0209038", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "awnr/Mistral-7B-v0.1-signtensors-1-over-4", - "developer": "mistral", - "inference_platform": "unknown", - "id": "awnr/Mistral-7B-v0.1-signtensors-1-over-4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2133007087860211 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35070947402846286 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34603125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2310505319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/awnr_Mistral-7B-v0.1-signtensors-3-over-8/12f4db59-10fe-47d0-86df-343ea8978249.json b/leaderboard_data/HFOpenLLMv2/mistral/awnr_Mistral-7B-v0.1-signtensors-3-over-8/12f4db59-10fe-47d0-86df-343ea8978249.json deleted file mode 100644 index 044ab23d25a02a28d98b3d003ab3b4660943ad77..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/awnr_Mistral-7B-v0.1-signtensors-3-over-8/12f4db59-10fe-47d0-86df-343ea8978249.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/awnr_Mistral-7B-v0.1-signtensors-3-over-8/1762652580.02111", - "retrieved_timestamp": "1762652580.021111", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "awnr/Mistral-7B-v0.1-signtensors-3-over-8", - "developer": "mistral", - "inference_platform": "unknown", - "id": "awnr/Mistral-7B-v0.1-signtensors-3-over-8" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23942915907569692 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4299940969601492 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38175000000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30011635638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/awnr_Mistral-7B-v0.1-signtensors-5-over-16/b0ae93c7-b251-42df-a67f-ca8b8a865937.json b/leaderboard_data/HFOpenLLMv2/mistral/awnr_Mistral-7B-v0.1-signtensors-5-over-16/b0ae93c7-b251-42df-a67f-ca8b8a865937.json deleted file mode 100644 index 084acf68b92b92a7d9920c34b7a70a68ffc46aad..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/awnr_Mistral-7B-v0.1-signtensors-5-over-16/b0ae93c7-b251-42df-a67f-ca8b8a865937.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/awnr_Mistral-7B-v0.1-signtensors-5-over-16/1762652580.021311", - "retrieved_timestamp": "1762652580.021312", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "awnr/Mistral-7B-v0.1-signtensors-5-over-16", - "developer": "mistral", - "inference_platform": "unknown", - "id": "awnr/Mistral-7B-v0.1-signtensors-5-over-16" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21182684166899385 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4124151161773006 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29579454787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/awnr_Mistral-7B-v0.1-signtensors-7-over-16/893da954-ca56-42ab-914d-44fbc4a6f1ff.json b/leaderboard_data/HFOpenLLMv2/mistral/awnr_Mistral-7B-v0.1-signtensors-7-over-16/893da954-ca56-42ab-914d-44fbc4a6f1ff.json deleted file mode 100644 index 62ea06ec228f2ab46fd17daa7708a9c5c08fed4f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/awnr_Mistral-7B-v0.1-signtensors-7-over-16/893da954-ca56-42ab-914d-44fbc4a6f1ff.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/awnr_Mistral-7B-v0.1-signtensors-7-over-16/1762652580.0215192", - "retrieved_timestamp": "1762652580.02152", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "awnr/Mistral-7B-v0.1-signtensors-7-over-16", - "developer": "mistral", - "inference_platform": "unknown", - "id": "awnr/Mistral-7B-v0.1-signtensors-7-over-16" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22936253584932426 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43158208189876196 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39520833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30302526595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/aws-prototyping_MegaBeam-Mistral-7B-512k/f05d6512-16ca-4f44-a31f-392f8f71da74.json b/leaderboard_data/HFOpenLLMv2/mistral/aws-prototyping_MegaBeam-Mistral-7B-512k/f05d6512-16ca-4f44-a31f-392f8f71da74.json deleted file mode 100644 index dc34bbeda263857f542617308fccb0a38a4e858c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/aws-prototyping_MegaBeam-Mistral-7B-512k/f05d6512-16ca-4f44-a31f-392f8f71da74.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/aws-prototyping_MegaBeam-Mistral-7B-512k/1762652580.0217311", - "retrieved_timestamp": "1762652580.0217311", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "aws-prototyping/MegaBeam-Mistral-7B-512k", - "developer": "mistral", - "inference_platform": "unknown", - "id": "aws-prototyping/MegaBeam-Mistral-7B-512k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5972586071623293 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3662336639946533 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3993645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25889295212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/axolotl-ai-co_romulus-mistral-nemo-12b-simpo/3f48c9eb-dbfa-4035-96a6-d4f516fa1e80.json b/leaderboard_data/HFOpenLLMv2/mistral/axolotl-ai-co_romulus-mistral-nemo-12b-simpo/3f48c9eb-dbfa-4035-96a6-d4f516fa1e80.json deleted file mode 100644 index 6f49afe699e4eb1d2e5a534ad8b4bcd68aac8952..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/axolotl-ai-co_romulus-mistral-nemo-12b-simpo/3f48c9eb-dbfa-4035-96a6-d4f516fa1e80.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/axolotl-ai-co_romulus-mistral-nemo-12b-simpo/1762652580.021987", - "retrieved_timestamp": "1762652580.0219882", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "axolotl-ai-co/romulus-mistral-nemo-12b-simpo", - "developer": "mistral", - "inference_platform": "unknown", - "id": "axolotl-ai-co/romulus-mistral-nemo-12b-simpo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.607924750772395 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5395057669562011 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11404833836858005 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42330208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3469082446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/bamec66557_Mistral-Nemo-VICIOUS_MESH-12B-2407/9cd84a08-1f21-42ad-b8c0-eeb2df93ee29.json b/leaderboard_data/HFOpenLLMv2/mistral/bamec66557_Mistral-Nemo-VICIOUS_MESH-12B-2407/9cd84a08-1f21-42ad-b8c0-eeb2df93ee29.json deleted file mode 100644 index a278448fe7d0bb402d838c188d221149e3de62dc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/bamec66557_Mistral-Nemo-VICIOUS_MESH-12B-2407/9cd84a08-1f21-42ad-b8c0-eeb2df93ee29.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/bamec66557_Mistral-Nemo-VICIOUS_MESH-12B-2407/1762652580.026026", - "retrieved_timestamp": "1762652580.026027", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407", - "developer": "mistral", - "inference_platform": "unknown", - "id": "bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6705729686121713 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5155964285724085 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13670694864048338 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4309895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36768617021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/cckm_tinymistral_950m/d0dbcd95-252f-46e0-9699-81b293cb7db5.json b/leaderboard_data/HFOpenLLMv2/mistral/cckm_tinymistral_950m/d0dbcd95-252f-46e0-9699-81b293cb7db5.json deleted file mode 100644 index 5cda9878f94b9b365a174f34114d26ab8a3d7ce9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/cckm_tinymistral_950m/d0dbcd95-252f-46e0-9699-81b293cb7db5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/cckm_tinymistral_950m/1762652580.099487", - "retrieved_timestamp": "1762652580.099488", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "cckm/tinymistral_950m", - "developer": "mistral", - "inference_platform": "unknown", - "id": "cckm/tinymistral_950m" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23952889444451833 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29694562621388126 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3553645833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10962433510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 0.955 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/chujiezheng_Mistral7B-PairRM-SPPO-ExPO/d7e88fea-5c3d-4b9c-85a9-a0cf35a97ea0.json b/leaderboard_data/HFOpenLLMv2/mistral/chujiezheng_Mistral7B-PairRM-SPPO-ExPO/d7e88fea-5c3d-4b9c-85a9-a0cf35a97ea0.json deleted file mode 100644 index f034a6d9c1bd5db299c0515c2c8e8c6d71a4f7f7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/chujiezheng_Mistral7B-PairRM-SPPO-ExPO/d7e88fea-5c3d-4b9c-85a9-a0cf35a97ea0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/chujiezheng_Mistral7B-PairRM-SPPO-ExPO/1762652580.101214", - "retrieved_timestamp": "1762652580.101215", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "chujiezheng/Mistral7B-PairRM-SPPO-ExPO", - "developer": "mistral", - "inference_platform": "unknown", - "id": "chujiezheng/Mistral7B-PairRM-SPPO-ExPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36734863495525205 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3882191262277366 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40553124999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2551529255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/flammenai_Mahou-1.2a-mistral-7B/d9804b0c-37db-492f-a1ba-851137e697f0.json b/leaderboard_data/HFOpenLLMv2/mistral/flammenai_Mahou-1.2a-mistral-7B/d9804b0c-37db-492f-a1ba-851137e697f0.json deleted file mode 100644 index 3a07d449f6a7802c3ff6be6727698f2344929a46..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/flammenai_Mahou-1.2a-mistral-7B/d9804b0c-37db-492f-a1ba-851137e697f0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/flammenai_Mahou-1.2a-mistral-7B/1762652580.155141", - "retrieved_timestamp": "1762652580.155141", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "flammenai/Mahou-1.2a-mistral-7B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "flammenai/Mahou-1.2a-mistral-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4552010886669592 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5118111474458115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38962500000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31632313829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/flammenai_Mahou-1.5-mistral-nemo-12B/1c4e9e6a-7bb8-410f-9a3b-f88ea0ed474c.json b/leaderboard_data/HFOpenLLMv2/mistral/flammenai_Mahou-1.5-mistral-nemo-12B/1c4e9e6a-7bb8-410f-9a3b-f88ea0ed474c.json deleted file mode 100644 index 5d939aa5c20ec192737a3b17faa7a1d9775c3211..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/flammenai_Mahou-1.5-mistral-nemo-12B/1c4e9e6a-7bb8-410f-9a3b-f88ea0ed474c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/flammenai_Mahou-1.5-mistral-nemo-12B/1762652580.155725", - "retrieved_timestamp": "1762652580.1557262", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "flammenai/Mahou-1.5-mistral-nemo-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "flammenai/Mahou-1.5-mistral-nemo-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6751441730164851 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5522361927910235 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08685800604229607 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4520416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3602061170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/hotmailuser_Mistral-modelstock-24B/58269430-efba-4d04-a69e-8ef666f2afee.json b/leaderboard_data/HFOpenLLMv2/mistral/hotmailuser_Mistral-modelstock-24B/58269430-efba-4d04-a69e-8ef666f2afee.json deleted file mode 100644 index 5be9988a34157fa29275ff70a9720a5cecade49a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/hotmailuser_Mistral-modelstock-24B/58269430-efba-4d04-a69e-8ef666f2afee.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_Mistral-modelstock-24B/1762652580.195392", - "retrieved_timestamp": "1762652580.195392", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/Mistral-modelstock-24B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "hotmailuser/Mistral-modelstock-24B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3424192254329623 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.645229041403176 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13066465256797583 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41023489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4590416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5069813829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/hotmailuser_Mistral-modelstock2-24B/7c9aa35b-3d8e-4b3f-8ae7-35698a1f1c70.json b/leaderboard_data/HFOpenLLMv2/mistral/hotmailuser_Mistral-modelstock2-24B/7c9aa35b-3d8e-4b3f-8ae7-35698a1f1c70.json deleted file mode 100644 index d526f05e53c88e0ad8f612e7289d7753963b17bb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/hotmailuser_Mistral-modelstock2-24B/7c9aa35b-3d8e-4b3f-8ae7-35698a1f1c70.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/hotmailuser_Mistral-modelstock2-24B/1762652580.195659", - "retrieved_timestamp": "1762652580.19566", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "hotmailuser/Mistral-modelstock2-24B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "hotmailuser/Mistral-modelstock2-24B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43184528163051816 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6689381929188762 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24018126888217523 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3926174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46161458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5318317819148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/irahulpandey_mistralai-7B-slerp-v0.1/034c23f5-6c03-4cee-b6b2-7263426cf975.json b/leaderboard_data/HFOpenLLMv2/mistral/irahulpandey_mistralai-7B-slerp-v0.1/034c23f5-6c03-4cee-b6b2-7263426cf975.json deleted file mode 100644 index 186634d980212e4fd6519dab471358c39d94a267..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/irahulpandey_mistralai-7B-slerp-v0.1/034c23f5-6c03-4cee-b6b2-7263426cf975.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/irahulpandey_mistralai-7B-slerp-v0.1/1762652580.23053", - "retrieved_timestamp": "1762652580.230531", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "irahulpandey/mistralai-7B-slerp-v0.1", - "developer": "mistral", - "inference_platform": "unknown", - "id": "irahulpandey/mistralai-7B-slerp-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4966167546554254 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5010682924547378 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45497916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2951296542553192 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/kaist-ai_mistral-orpo-capybara-7k/811cf797-62a1-4fda-960c-ee51f3e24a03.json b/leaderboard_data/HFOpenLLMv2/mistral/kaist-ai_mistral-orpo-capybara-7k/811cf797-62a1-4fda-960c-ee51f3e24a03.json deleted file mode 100644 index efbc093457c6d19b587ea3a006e9e0517889b54c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/kaist-ai_mistral-orpo-capybara-7k/811cf797-62a1-4fda-960c-ee51f3e24a03.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/kaist-ai_mistral-orpo-capybara-7k/1762652580.30416", - "retrieved_timestamp": "1762652580.304161", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "kaist-ai/mistral-orpo-capybara-7k", - "developer": "mistral", - "inference_platform": "unknown", - "id": "kaist-ai/mistral-orpo-capybara-7k" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.536733644507684 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4488995185492166 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3963541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.297124335106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/llmat_Mistral-v0.3-7B-ORPO/04a1b79b-a5af-420d-829b-0750341490cf.json b/leaderboard_data/HFOpenLLMv2/mistral/llmat_Mistral-v0.3-7B-ORPO/04a1b79b-a5af-420d-829b-0750341490cf.json deleted file mode 100644 index 17cecf0526addfc40c38f5e357dc01d972cee9d1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/llmat_Mistral-v0.3-7B-ORPO/04a1b79b-a5af-420d-829b-0750341490cf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/llmat_Mistral-v0.3-7B-ORPO/1762652580.325205", - "retrieved_timestamp": "1762652580.325206", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "llmat/Mistral-v0.3-7B-ORPO", - "developer": "mistral", - "inference_platform": "unknown", - "id": "llmat/Mistral-v0.3-7B-ORPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3639764713183243 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.400465557804411 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0015105740181268882 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3528541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23013630319148937 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/llmat_Mistral-v0.3-7B-ORPO/ff710b55-0a89-4582-8caa-867efb88cf98.json b/leaderboard_data/HFOpenLLMv2/mistral/llmat_Mistral-v0.3-7B-ORPO/ff710b55-0a89-4582-8caa-867efb88cf98.json deleted file mode 100644 index 660f453658816120d6872d0a6faf80d90c2c6284..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/llmat_Mistral-v0.3-7B-ORPO/ff710b55-0a89-4582-8caa-867efb88cf98.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/llmat_Mistral-v0.3-7B-ORPO/1762652580.324949", - "retrieved_timestamp": "1762652580.324949", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "llmat/Mistral-v0.3-7B-ORPO", - "developer": "mistral", - "inference_platform": "unknown", - "id": "llmat/Mistral-v0.3-7B-ORPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3770406964631622 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39776607302918093 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35552083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2278091755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/migtissera_Tess-3-Mistral-Nemo-12B/7ef5c287-cf98-429f-80c3-d71743612a73.json b/leaderboard_data/HFOpenLLMv2/mistral/migtissera_Tess-3-Mistral-Nemo-12B/7ef5c287-cf98-429f-80c3-d71743612a73.json deleted file mode 100644 index 2549f80d8f6ab15aed1773b8f238429eaadb0ddc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/migtissera_Tess-3-Mistral-Nemo-12B/7ef5c287-cf98-429f-80c3-d71743612a73.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/migtissera_Tess-3-Mistral-Nemo-12B/1762652580.358769", - "retrieved_timestamp": "1762652580.35877", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "migtissera/Tess-3-Mistral-Nemo-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "migtissera/Tess-3-Mistral-Nemo-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.335499807178287 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.489942302453045 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44578125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25648271276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/mistral-community_Mistral-7B-v0.2/a65136c6-b3d7-4107-8d3a-0ce84b77965b.json b/leaderboard_data/HFOpenLLMv2/mistral/mistral-community_Mistral-7B-v0.2/a65136c6-b3d7-4107-8d3a-0ce84b77965b.json deleted file mode 100644 index 0c1052c48e95dfd7eb8b833a6703e184d192a18b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/mistral-community_Mistral-7B-v0.2/a65136c6-b3d7-4107-8d3a-0ce84b77965b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mistral-community_Mistral-7B-v0.2/1762652580.360901", - "retrieved_timestamp": "1762652580.3609018", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mistral-community/Mistral-7B-v0.2", - "developer": "mistral", - "inference_platform": "unknown", - "id": "mistral-community/Mistral-7B-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22663976028050017 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4510187962797583 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4031770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2952958776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/mistral-community_Mixtral-8x22B-v0.1/810fc203-f10a-49ad-8a6f-58cbd70f2205.json b/leaderboard_data/HFOpenLLMv2/mistral/mistral-community_Mixtral-8x22B-v0.1/810fc203-f10a-49ad-8a6f-58cbd70f2205.json deleted file mode 100644 index f70230d3dec26310d4c665c3b123eedf6393a15c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/mistral-community_Mixtral-8x22B-v0.1/810fc203-f10a-49ad-8a6f-58cbd70f2205.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mistral-community_Mixtral-8x22B-v0.1/1762652580.361141", - "retrieved_timestamp": "1762652580.361141", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mistral-community/Mixtral-8x22B-v0.1", - "developer": "mistral", - "inference_platform": "unknown", - "id": "mistral-community/Mixtral-8x22B-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3166564417177914 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38000000000000006 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15428571428571428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35333333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Unknown", - "params_billions": 0.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/mistral-community_mixtral-8x22B-v0.3/abeddace-67d6-484a-b410-95d92819dfe5.json b/leaderboard_data/HFOpenLLMv2/mistral/mistral-community_mixtral-8x22B-v0.3/abeddace-67d6-484a-b410-95d92819dfe5.json deleted file mode 100644 index 389b167f7d1c5e2cea06079554b94e6a875fd14c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/mistral-community_mixtral-8x22B-v0.3/abeddace-67d6-484a-b410-95d92819dfe5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mistral-community_mixtral-8x22B-v0.3/1762652580.361342", - "retrieved_timestamp": "1762652580.361343", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mistral-community/mixtral-8x22B-v0.3", - "developer": "mistral", - "inference_platform": "unknown", - "id": "mistral-community/mixtral-8x22B-v0.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25826362939223485 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6250002178435845 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18353474320241692 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3775167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4036979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46392952127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 140.63 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/mistralai_Codestral-22B-v0.1/b6fa1ae6-3df8-437d-a844-3fa022c12370.json b/leaderboard_data/HFOpenLLMv2/mistral/mistralai_Codestral-22B-v0.1/b6fa1ae6-3df8-437d-a844-3fa022c12370.json deleted file mode 100644 index 8ef2c1deb580e7e3b062f5df96327fdef472d1c5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/mistralai_Codestral-22B-v0.1/b6fa1ae6-3df8-437d-a844-3fa022c12370.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mistralai_Codestral-22B-v0.1/1762652580.361543", - "retrieved_timestamp": "1762652580.361544", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mistralai/Codestral-22B-v0.1", - "developer": "mistral", - "inference_platform": "unknown", - "id": "mistralai/Codestral-22B-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5771752283939946 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5139136921003167 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10045317220543806 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4187083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3155751329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/mistralai_Mistral-7B-v0.1/44381c62-a310-4f01-bd66-9d1434638cf4.json b/leaderboard_data/HFOpenLLMv2/mistral/mistralai_Mistral-7B-v0.1/44381c62-a310-4f01-bd66-9d1434638cf4.json deleted file mode 100644 index 14e5c750f2fae0c86f5389469028795f8670b028..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/mistralai_Mistral-7B-v0.1/44381c62-a310-4f01-bd66-9d1434638cf4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mistralai_Mistral-7B-v0.1/1762652580.362653", - "retrieved_timestamp": "1762652580.362654", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mistralai/Mistral-7B-v0.1", - "developer": "mistral", - "inference_platform": "unknown", - "id": "mistralai/Mistral-7B-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2385548123423627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4419401145517045 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4139375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30127992021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/mistralai_Mistral-7B-v0.3/1a3acc9e-b2cd-4f80-8fcc-b227eee29f26.json b/leaderboard_data/HFOpenLLMv2/mistral/mistralai_Mistral-7B-v0.3/1a3acc9e-b2cd-4f80-8fcc-b227eee29f26.json deleted file mode 100644 index fbb368675b37ce70882d635dd70abdb2589f7641..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/mistralai_Mistral-7B-v0.3/1a3acc9e-b2cd-4f80-8fcc-b227eee29f26.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mistralai_Mistral-7B-v0.3/1762652580.362854", - "retrieved_timestamp": "1762652580.362854", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mistralai/Mistral-7B-v0.3", - "developer": "mistral", - "inference_platform": "unknown", - "id": "mistralai/Mistral-7B-v0.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22663976028050017 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45168546294642503 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4031770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2952958776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/mistralai_Mistral-Nemo-Base-2407/51b35f7f-f6f7-44ca-9816-b3d812112131.json b/leaderboard_data/HFOpenLLMv2/mistral/mistralai_Mistral-Nemo-Base-2407/51b35f7f-f6f7-44ca-9816-b3d812112131.json deleted file mode 100644 index 00ffe181adb3ecef7ce9fcee4a825106c2c10cee..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/mistralai_Mistral-Nemo-Base-2407/51b35f7f-f6f7-44ca-9816-b3d812112131.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mistralai_Mistral-Nemo-Base-2407/1762652580.363275", - "retrieved_timestamp": "1762652580.363276", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mistralai/Mistral-Nemo-Base-2407", - "developer": "mistral", - "inference_platform": "unknown", - "id": "mistralai/Mistral-Nemo-Base-2407" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16299197241098062 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5035062000369291 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05966767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3921354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34715757978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 11.58 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/mistralai_Mistral-Small-24B-Base-2501/6b30f50f-9a89-4a11-bcf9-4f38c46c1f18.json b/leaderboard_data/HFOpenLLMv2/mistral/mistralai_Mistral-Small-24B-Base-2501/6b30f50f-9a89-4a11-bcf9-4f38c46c1f18.json deleted file mode 100644 index 01140ebe02fb9b4b0e90076e163339d75b05faf3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/mistralai_Mistral-Small-24B-Base-2501/6b30f50f-9a89-4a11-bcf9-4f38c46c1f18.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mistralai_Mistral-Small-24B-Base-2501/1762652580.363713", - "retrieved_timestamp": "1762652580.363714", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mistralai/Mistral-Small-24B-Base-2501", - "developer": "mistral", - "inference_platform": "unknown", - "id": "mistralai/Mistral-Small-24B-Base-2501" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16723848278124265 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6441860347172437 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1971299093655589 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42366666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5406416223404256 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/mistralai_Mixtral-8x22B-v0.1/b08cfbfa-906a-4dd0-b258-a7a56a6dcda4.json b/leaderboard_data/HFOpenLLMv2/mistral/mistralai_Mixtral-8x22B-v0.1/b08cfbfa-906a-4dd0-b258-a7a56a6dcda4.json deleted file mode 100644 index 10352b777bd463fbe32e6c6716326099b576483d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/mistralai_Mixtral-8x22B-v0.1/b08cfbfa-906a-4dd0-b258-a7a56a6dcda4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mistralai_Mixtral-8x22B-v0.1/1762652580.364491", - "retrieved_timestamp": "1762652580.364492", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mistralai/Mixtral-8x22B-v0.1", - "developer": "mistral", - "inference_platform": "unknown", - "id": "mistralai/Mixtral-8x22B-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25826362939223485 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6239807473187268 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18353474320241692 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37583892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4036979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46392952127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 140.621 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/mistralai_Mixtral-8x7B-v0.1/4384c278-c869-4591-84fd-a8b2843fe42d.json b/leaderboard_data/HFOpenLLMv2/mistral/mistralai_Mixtral-8x7B-v0.1/4384c278-c869-4591-84fd-a8b2843fe42d.json deleted file mode 100644 index 5d812e45fed6b54cd621c8e8233c7e0e350c3bca..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/mistralai_Mixtral-8x7B-v0.1/4384c278-c869-4591-84fd-a8b2843fe42d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mistralai_Mixtral-8x7B-v0.1/1762652580.3651662", - "retrieved_timestamp": "1762652580.3651662", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mistralai/Mixtral-8x7B-v0.1", - "developer": "mistral", - "inference_platform": "unknown", - "id": "mistralai/Mixtral-8x7B-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23260947618984296 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5097711377553386 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32046979865771813 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4413125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3871343085106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/mistralai_Mixtral-8x7B-v0.1/f1822f64-0594-4f16-98f4-29932c604187.json b/leaderboard_data/HFOpenLLMv2/mistral/mistralai_Mixtral-8x7B-v0.1/f1822f64-0594-4f16-98f4-29932c604187.json deleted file mode 100644 index 5c64cd96d666cdac7eaa59c279e80f29715f9f59..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/mistralai_Mixtral-8x7B-v0.1/f1822f64-0594-4f16-98f4-29932c604187.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mistralai_Mixtral-8x7B-v0.1/1762652580.364961", - "retrieved_timestamp": "1762652580.364962", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mistralai/Mixtral-8x7B-v0.1", - "developer": "mistral", - "inference_platform": "unknown", - "id": "mistralai/Mixtral-8x7B-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24152692633324024 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.508666743762444 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43213541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3849734042553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nazimali_Mistral-Nemo-Kurdish-Instruct/27e58a27-f4e9-4c7a-93f2-c3b15cab8f9f.json b/leaderboard_data/HFOpenLLMv2/mistral/nazimali_Mistral-Nemo-Kurdish-Instruct/27e58a27-f4e9-4c7a-93f2-c3b15cab8f9f.json deleted file mode 100644 index 9b585241778e8ca40292d0a838b5bce295ee3bcc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nazimali_Mistral-Nemo-Kurdish-Instruct/27e58a27-f4e9-4c7a-93f2-c3b15cab8f9f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nazimali_Mistral-Nemo-Kurdish-Instruct/1762652580.376322", - "retrieved_timestamp": "1762652580.376323", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nazimali/Mistral-Nemo-Kurdish-Instruct", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nazimali/Mistral-Nemo-Kurdish-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4860004787297703 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47214400722999256 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40057291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30867686170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nazimali_Mistral-Nemo-Kurdish-Instruct/3381e897-35f3-45f4-ac05-3ca47441b772.json b/leaderboard_data/HFOpenLLMv2/mistral/nazimali_Mistral-Nemo-Kurdish-Instruct/3381e897-35f3-45f4-ac05-3ca47441b772.json deleted file mode 100644 index 1e0cbd5572dbd66bb714941a1c321b3cd6f39a00..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nazimali_Mistral-Nemo-Kurdish-Instruct/3381e897-35f3-45f4-ac05-3ca47441b772.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nazimali_Mistral-Nemo-Kurdish-Instruct/1762652580.376105", - "retrieved_timestamp": "1762652580.376106", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nazimali/Mistral-Nemo-Kurdish-Instruct", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nazimali/Mistral-Nemo-Kurdish-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4963917959901949 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4699417600389813 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.397875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062666223404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nazimali_Mistral-Nemo-Kurdish/0da50308-a631-4466-b2e4-2793412b31db.json b/leaderboard_data/HFOpenLLMv2/mistral/nazimali_Mistral-Nemo-Kurdish/0da50308-a631-4466-b2e4-2793412b31db.json deleted file mode 100644 index 4a448af8cd2d4c5320b0d17371dea1cce0078d80..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nazimali_Mistral-Nemo-Kurdish/0da50308-a631-4466-b2e4-2793412b31db.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nazimali_Mistral-Nemo-Kurdish/1762652580.375733", - "retrieved_timestamp": "1762652580.3757372", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nazimali/Mistral-Nemo-Kurdish", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nazimali/Mistral-Nemo-Kurdish" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3401208792670115 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5133321102266589 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09592145015105741 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4115729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3234707446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_BigKartoffel-mistral-nemo-20B/95ba0175-5578-47fe-aec9-93ccf4f9f9af.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_BigKartoffel-mistral-nemo-20B/95ba0175-5578-47fe-aec9-93ccf4f9f9af.json deleted file mode 100644 index f637629baeac469c7e4de7f859d2c8f46510e2f2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_BigKartoffel-mistral-nemo-20B/95ba0175-5578-47fe-aec9-93ccf4f9f9af.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_BigKartoffel-mistral-nemo-20B/1762652580.376553", - "retrieved_timestamp": "1762652580.376553", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/BigKartoffel-mistral-nemo-20B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/BigKartoffel-mistral-nemo-20B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5857181168189294 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.55148305168682 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42804166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3529753989361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 20.427 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_DoppelKartoffel-Mistral-Nemo-23B/5db2ec95-d423-4987-aaa7-b5919d1a2cc8.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_DoppelKartoffel-Mistral-Nemo-23B/5db2ec95-d423-4987-aaa7-b5919d1a2cc8.json deleted file mode 100644 index 7b0f9ef788d41732eb490a15bda4c95ddef2c83d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_DoppelKartoffel-Mistral-Nemo-23B/5db2ec95-d423-4987-aaa7-b5919d1a2cc8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_DoppelKartoffel-Mistral-Nemo-23B/1762652580.376802", - "retrieved_timestamp": "1762652580.3768032", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/DoppelKartoffel-Mistral-Nemo-23B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/DoppelKartoffel-Mistral-Nemo-23B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5191480826429429 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5217926041279988 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3794895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3080119680851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.153 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_DoublePotato-Mistral-Nemo-13B/03b30ba7-efc3-467e-bdde-c6a18437929b.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_DoublePotato-Mistral-Nemo-13B/03b30ba7-efc3-467e-bdde-c6a18437929b.json deleted file mode 100644 index 986069db9c7ef84d0a5330de7817e601fa4fe24c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_DoublePotato-Mistral-Nemo-13B/03b30ba7-efc3-467e-bdde-c6a18437929b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_DoublePotato-Mistral-Nemo-13B/1762652580.377009", - "retrieved_timestamp": "1762652580.3770099", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/DoublePotato-Mistral-Nemo-13B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/DoublePotato-Mistral-Nemo-13B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6796156420519777 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5437915398770364 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45997916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.359624335106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 13.338 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Flammades-Mistral-Nemo-12B/a6e65aeb-f0d3-48ca-8f6e-933d0ea2113b.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Flammades-Mistral-Nemo-12B/a6e65aeb-f0d3-48ca-8f6e-933d0ea2113b.json deleted file mode 100644 index 9ee10741639280d58650a6b7b96b047e7d30c39a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Flammades-Mistral-Nemo-12B/a6e65aeb-f0d3-48ca-8f6e-933d0ea2113b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Flammades-Mistral-Nemo-12B/1762652580.3785129", - "retrieved_timestamp": "1762652580.3785138", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Flammades-Mistral-Nemo-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Flammades-Mistral-Nemo-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38415958545548035 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5299609345270283 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0755287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.480625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36610704787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Gutensuppe-mistral-nemo-12B/80a9277b-5768-4da0-96c6-3289a7b8a9bc.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Gutensuppe-mistral-nemo-12B/80a9277b-5768-4da0-96c6-3289a7b8a9bc.json deleted file mode 100644 index e91337d8bba5e472d32833efb2543759a14b0b76..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Gutensuppe-mistral-nemo-12B/80a9277b-5768-4da0-96c6-3289a7b8a9bc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Gutensuppe-mistral-nemo-12B/1762652580.378963", - "retrieved_timestamp": "1762652580.378964", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Gutensuppe-mistral-nemo-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Gutensuppe-mistral-nemo-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29161070404305023 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5486832203098263 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42903125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3680186170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Hermes2-Gutenberg2-Mistral-7B/b9b08e55-0c5d-427d-914b-e4cfb4de96b8.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Hermes2-Gutenberg2-Mistral-7B/b9b08e55-0c5d-427d-914b-e4cfb4de96b8.json deleted file mode 100644 index 7d8c9c761e41801ca003c3e9e0652f77f81f3d5f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Hermes2-Gutenberg2-Mistral-7B/b9b08e55-0c5d-427d-914b-e4cfb4de96b8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Hermes2-Gutenberg2-Mistral-7B/1762652580.379175", - "retrieved_timestamp": "1762652580.379176", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Hermes2-Gutenberg2-Mistral-7B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Hermes2-Gutenberg2-Mistral-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37214479802479644 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4981450458280896 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46230208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29928523936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Lyra-Gutenberg-mistral-nemo-12B/5b3de7db-009e-46c9-bf34-fe5912c39b81.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Lyra-Gutenberg-mistral-nemo-12B/5b3de7db-009e-46c9-bf34-fe5912c39b81.json deleted file mode 100644 index 194fd0d2bd8eaf8c34c1f847fcbe24a7ff9baa7e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Lyra-Gutenberg-mistral-nemo-12B/5b3de7db-009e-46c9-bf34-fe5912c39b81.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Lyra-Gutenberg-mistral-nemo-12B/1762652580.3801112", - "retrieved_timestamp": "1762652580.380112", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Lyra-Gutenberg-mistral-nemo-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Lyra-Gutenberg-mistral-nemo-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34948824674086976 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5586245741555749 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10120845921450151 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338926174496644 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43566666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36278257978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mahou-1.5-mistral-nemo-12B-lorablated/0cee26b2-c3b3-40be-bc15-3fdaf7b4b38c.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mahou-1.5-mistral-nemo-12B-lorablated/0cee26b2-c3b3-40be-bc15-3fdaf7b4b38c.json deleted file mode 100644 index 2dd09d1d51c2ce6f046f304e7f04cdb797c7eaa0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mahou-1.5-mistral-nemo-12B-lorablated/0cee26b2-c3b3-40be-bc15-3fdaf7b4b38c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Mahou-1.5-mistral-nemo-12B-lorablated/1762652580.380727", - "retrieved_timestamp": "1762652580.380728", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Mahou-1.5-mistral-nemo-12B-lorablated", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Mahou-1.5-mistral-nemo-12B-lorablated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6824880206740338 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5496040380079439 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45216666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35738031914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mistral-Gutenberg-Doppel-7B-FFT/c3eae55f-ce07-4ea2-b9d4-92e0909a8b06.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mistral-Gutenberg-Doppel-7B-FFT/c3eae55f-ce07-4ea2-b9d4-92e0909a8b06.json deleted file mode 100644 index cf3fab77b30d8c21467e41ce48b51005bdff3456..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mistral-Gutenberg-Doppel-7B-FFT/c3eae55f-ce07-4ea2-b9d4-92e0909a8b06.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Gutenberg-Doppel-7B-FFT/1762652580.380932", - "retrieved_timestamp": "1762652580.380933", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Mistral-Gutenberg-Doppel-7B-FFT", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Mistral-Gutenberg-Doppel-7B-FFT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5716798095719358 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40762540890255944 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4059375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2728557180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mistral-Nemo-Gutenberg-Doppel-12B-v2/178418ad-2d0a-40cd-a057-105bbe69f937.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mistral-Nemo-Gutenberg-Doppel-12B-v2/178418ad-2d0a-40cd-a057-105bbe69f937.json deleted file mode 100644 index 19af3b047079e8e2323f4b4d0fd1f2a257ca12b8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mistral-Nemo-Gutenberg-Doppel-12B-v2/178418ad-2d0a-40cd-a057-105bbe69f937.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Nemo-Gutenberg-Doppel-12B-v2/1762652580.3813472", - "retrieved_timestamp": "1762652580.3813481", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B-v2", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6535869271311232 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5374496172235809 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42330208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3546376329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mistral-Nemo-Gutenberg-Doppel-12B/012b188f-db69-4529-bfe3-db34c77e7dc0.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mistral-Nemo-Gutenberg-Doppel-12B/012b188f-db69-4529-bfe3-db34c77e7dc0.json deleted file mode 100644 index 58a1c1c1ec063882ae43d8b807fcd4a86cb569af..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mistral-Nemo-Gutenberg-Doppel-12B/012b188f-db69-4529-bfe3-db34c77e7dc0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Nemo-Gutenberg-Doppel-12B/1762652580.381143", - "retrieved_timestamp": "1762652580.381144", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3567068711020093 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5274606999473499 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41321874999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35787898936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mistral-Nemo-Prism-12B-v2/e5582319-d8e6-4223-97bb-a64a2cc03853.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mistral-Nemo-Prism-12B-v2/e5582319-d8e6-4223-97bb-a64a2cc03853.json deleted file mode 100644 index 8c1a7d256732390b5987b5846030e321177e6da0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mistral-Nemo-Prism-12B-v2/e5582319-d8e6-4223-97bb-a64a2cc03853.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Nemo-Prism-12B-v2/1762652580.3824818", - "retrieved_timestamp": "1762652580.382483", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Mistral-Nemo-Prism-12B-v2", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Mistral-Nemo-Prism-12B-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6974006746543615 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5491875637377679 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45997916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3567154255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mistral-Nemo-Prism-12B-v7/d66604f0-15b3-4ac3-b0e9-083ab6906da0.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mistral-Nemo-Prism-12B-v7/d66604f0-15b3-4ac3-b0e9-083ab6906da0.json deleted file mode 100644 index 16a2168b3833ab6e3dbf160d0a49835a9fa0f790..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mistral-Nemo-Prism-12B-v7/d66604f0-15b3-4ac3-b0e9-083ab6906da0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Nemo-Prism-12B-v7/1762652580.382694", - "retrieved_timestamp": "1762652580.382695", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Mistral-Nemo-Prism-12B-v7", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Mistral-Nemo-Prism-12B-v7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6961517662025647 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5521104600038905 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08685800604229607 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46388541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35904255319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mistral-Nemo-Prism-12B/5ea20d83-ceee-4c52-911a-e25e9cfecf0e.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mistral-Nemo-Prism-12B/5ea20d83-ceee-4c52-911a-e25e9cfecf0e.json deleted file mode 100644 index 76f532d5f916a9d3335fc21349e1ed4a8cf9b795..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mistral-Nemo-Prism-12B/5ea20d83-ceee-4c52-911a-e25e9cfecf0e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Nemo-Prism-12B/1762652580.382256", - "retrieved_timestamp": "1762652580.382257", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Mistral-Nemo-Prism-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Mistral-Nemo-Prism-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6858103166265509 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5475186352291487 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08685800604229607 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46261458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3581283244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mistral-Small-Drummer-22B/2e86d526-de04-4339-8495-e88c5a9f3f18.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mistral-Small-Drummer-22B/2e86d526-de04-4339-8495-e88c5a9f3f18.json deleted file mode 100644 index e6b3f8b8c9778cfb6d57dee9cc7eb022a2590d56..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mistral-Small-Drummer-22B/2e86d526-de04-4339-8495-e88c5a9f3f18.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Small-Drummer-22B/1762652580.3829079", - "retrieved_timestamp": "1762652580.3829088", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Mistral-Small-Drummer-22B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Mistral-Small-Drummer-22B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6331289866443259 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5793201948136216 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18882175226586104 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40636458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40949135638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mistral-Small-Gutenberg-Doppel-22B/99cfc94d-3cde-4e42-924a-5c4a4c7f217a.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mistral-Small-Gutenberg-Doppel-22B/99cfc94d-3cde-4e42-924a-5c4a4c7f217a.json deleted file mode 100644 index d0186f18f14e46bf78eccff52ab12a1095016c31..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Mistral-Small-Gutenberg-Doppel-22B/99cfc94d-3cde-4e42-924a-5c4a4c7f217a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Small-Gutenberg-Doppel-22B/1762652580.383116", - "retrieved_timestamp": "1762652580.383116", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Mistral-Small-Gutenberg-Doppel-22B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Mistral-Small-Gutenberg-Doppel-22B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48932277468228746 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5858932329112819 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21827794561933533 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39706250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41240026595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Stella-mistral-nemo-12B-v2/ed825fd6-f749-449f-a1d6-c3ad7a82e354.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Stella-mistral-nemo-12B-v2/ed825fd6-f749-449f-a1d6-c3ad7a82e354.json deleted file mode 100644 index b100c5d48e3d777289669b98512daa0490807de4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_Stella-mistral-nemo-12B-v2/ed825fd6-f749-449f-a1d6-c3ad7a82e354.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Stella-mistral-nemo-12B-v2/1762652580.384186", - "retrieved_timestamp": "1762652580.384186", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Stella-mistral-nemo-12B-v2", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/Stella-mistral-nemo-12B-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32743121584063617 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5483750956495209 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163141993957704 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4303958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3684341755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-bophades-12B/1cb58f83-841d-474a-9c7b-adece8cab805.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-bophades-12B/1cb58f83-841d-474a-9c7b-adece8cab805.json deleted file mode 100644 index 7435e83fe43964cd2280df55f7c936c0000a9d38..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-bophades-12B/1cb58f83-841d-474a-9c7b-adece8cab805.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-bophades-12B/1762652580.385997", - "retrieved_timestamp": "1762652580.385998", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/mistral-nemo-bophades-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/mistral-nemo-bophades-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6794405510711579 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4988471515853883 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12311178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41778125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35006648936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-bophades3-12B/2043110d-2b63-4133-9c53-b39b5b7869b6.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-bophades3-12B/2043110d-2b63-4133-9c53-b39b5b7869b6.json deleted file mode 100644 index cf69675cd4c5e76cdc6f4a528ed8f3b7f03852a4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-bophades3-12B/2043110d-2b63-4133-9c53-b39b5b7869b6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-bophades3-12B/1762652580.386282", - "retrieved_timestamp": "1762652580.386283", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/mistral-nemo-bophades3-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/mistral-nemo-bophades3-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6577835698169745 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.544933208169299 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4604479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3371010638297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-cc-12B/45e38c7d-5f31-404b-8fcc-9f3cad239cd1.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-cc-12B/45e38c7d-5f31-404b-8fcc-9f3cad239cd1.json deleted file mode 100644 index 1bbdb4318500ffd1a758da3fd2920185aa5f0b70..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-cc-12B/45e38c7d-5f31-404b-8fcc-9f3cad239cd1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-cc-12B/1762652580.386496", - "retrieved_timestamp": "1762652580.386497", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/mistral-nemo-cc-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/mistral-nemo-cc-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14353249378316202 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5399409546487519 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0256797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44236458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3597905585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-gutades-12B/b83d5033-b513-4472-84c1-1b757c533137.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-gutades-12B/b83d5033-b513-4472-84c1-1b757c533137.json deleted file mode 100644 index e75800965f1ddc039f6a6cc4ae214ddc460b95fc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-gutades-12B/b83d5033-b513-4472-84c1-1b757c533137.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-gutades-12B/1762652580.3867059", - "retrieved_timestamp": "1762652580.3867059", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/mistral-nemo-gutades-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/mistral-nemo-gutades-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3425189608017837 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5407194259684368 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4040416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3560505319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-gutenberg-12B-v2/db2dee58-3a9c-4789-800d-ed7207c6699c.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-gutenberg-12B-v2/db2dee58-3a9c-4789-800d-ed7207c6699c.json deleted file mode 100644 index c0fd12a006d3791b47747d58a8dd80c571ce7a6e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-gutenberg-12B-v2/db2dee58-3a9c-4789-800d-ed7207c6699c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-gutenberg-12B-v2/1762652580.38711", - "retrieved_timestamp": "1762652580.387111", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/mistral-nemo-gutenberg-12B-v2", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/mistral-nemo-gutenberg-12B-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6203395878491292 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5397203788283472 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10876132930513595 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4286979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34990026595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-gutenberg-12B-v3/b4ed9f85-c1bb-4a52-8ba6-69f4e0f8e442.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-gutenberg-12B-v3/b4ed9f85-c1bb-4a52-8ba6-69f4e0f8e442.json deleted file mode 100644 index 327f7b3762a1c60bcc6c9f9eb539a5c1b7b74b7f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-gutenberg-12B-v3/b4ed9f85-c1bb-4a52-8ba6-69f4e0f8e442.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-gutenberg-12B-v3/1762652580.387317", - "retrieved_timestamp": "1762652580.3873181", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/mistral-nemo-gutenberg-12B-v3", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/mistral-nemo-gutenberg-12B-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21827085466562057 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.544065799051091 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05966767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44503125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3644448138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-gutenberg-12B-v4/9f84023e-a23c-4d2c-afb3-f93629f97a6f.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-gutenberg-12B-v4/9f84023e-a23c-4d2c-afb3-f93629f97a6f.json deleted file mode 100644 index b56ed44f32634aeeb6577dcfeb8914e9e3e242d8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-gutenberg-12B-v4/9f84023e-a23c-4d2c-afb3-f93629f97a6f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-gutenberg-12B-v4/1762652580.3875241", - "retrieved_timestamp": "1762652580.387525", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/mistral-nemo-gutenberg-12B-v4", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/mistral-nemo-gutenberg-12B-v4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.237929804031082 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5269028864823667 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12613293051359517 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4104270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3575465425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-gutenberg-12B/9f8c4246-9770-4790-8db0-095e722d89e9.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-gutenberg-12B/9f8c4246-9770-4790-8db0-095e722d89e9.json deleted file mode 100644 index c3739fd8d3c0a53a95934c6a2a027410233e4551..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-gutenberg-12B/9f8c4246-9770-4790-8db0-095e722d89e9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-gutenberg-12B/1762652580.3869052", - "retrieved_timestamp": "1762652580.3869061", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/mistral-nemo-gutenberg-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/mistral-nemo-gutenberg-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.350386973231027 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5281363707697807 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163141993957704 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41706250000000006 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3562167553191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-gutenberg2-12B-test/10a4d2dc-4779-4b0f-92fa-010a6a51fe9f.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-gutenberg2-12B-test/10a4d2dc-4779-4b0f-92fa-010a6a51fe9f.json deleted file mode 100644 index 878eed44fffa536215dfccbbf5786cb50a158261..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-gutenberg2-12B-test/10a4d2dc-4779-4b0f-92fa-010a6a51fe9f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-gutenberg2-12B-test/1762652580.387729", - "retrieved_timestamp": "1762652580.38773", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/mistral-nemo-gutenberg2-12B-test", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/mistral-nemo-gutenberg2-12B-test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33847192116916447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.525477908630255 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163141993957704 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4157291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35546875 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-kartoffel-12B/b111507d-92e8-4af1-882a-9434d6825f51.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-kartoffel-12B/b111507d-92e8-4af1-882a-9434d6825f51.json deleted file mode 100644 index 41c5ea155247c284c636f39575b40ae38d27efc1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-kartoffel-12B/b111507d-92e8-4af1-882a-9434d6825f51.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-kartoffel-12B/1762652580.3880079", - "retrieved_timestamp": "1762652580.3880079", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/mistral-nemo-kartoffel-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/mistral-nemo-kartoffel-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7031709198260616 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5483796436144805 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08534743202416918 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46528125000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35846077127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-narwhal-12B/e1bd9218-4bfb-4df1-a2bf-4a10937240dc.json b/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-narwhal-12B/e1bd9218-4bfb-4df1-a2bf-4a10937240dc.json deleted file mode 100644 index 7291c03b1e787267341b213944b60ba16620bb98..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nbeerbower_mistral-nemo-narwhal-12B/e1bd9218-4bfb-4df1-a2bf-4a10937240dc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-narwhal-12B/1762652580.388214", - "retrieved_timestamp": "1762652580.388215", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/mistral-nemo-narwhal-12B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nbeerbower/mistral-nemo-narwhal-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5549187267561182 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5057374929934754 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38469791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34832114361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nlpguy_Mistral-NeMo-Minitron-Upscale-v1/97b61e29-2157-4167-b5bd-94919ecdcacc.json b/leaderboard_data/HFOpenLLMv2/mistral/nlpguy_Mistral-NeMo-Minitron-Upscale-v1/97b61e29-2157-4167-b5bd-94919ecdcacc.json deleted file mode 100644 index 7e5035282e2d27dabc72e8c1fee8925615f50901..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nlpguy_Mistral-NeMo-Minitron-Upscale-v1/97b61e29-2157-4167-b5bd-94919ecdcacc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nlpguy_Mistral-NeMo-Minitron-Upscale-v1/1762652580.4083898", - "retrieved_timestamp": "1762652580.408391", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nlpguy/Mistral-NeMo-Minitron-Upscale-v1", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nlpguy/Mistral-NeMo-Minitron-Upscale-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16484040124647048 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44679984097967057 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3803541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2537400265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.451 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nlpguy_Mistral-NeMo-Minitron-Upscale-v2/9cee29c1-b8dc-4a2c-b117-d5912b890824.json b/leaderboard_data/HFOpenLLMv2/mistral/nlpguy_Mistral-NeMo-Minitron-Upscale-v2/9cee29c1-b8dc-4a2c-b117-d5912b890824.json deleted file mode 100644 index a8fc22a27a03e01085850e02a97522bb56b230ea..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nlpguy_Mistral-NeMo-Minitron-Upscale-v2/9cee29c1-b8dc-4a2c-b117-d5912b890824.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nlpguy_Mistral-NeMo-Minitron-Upscale-v2/1762652580.4086552", - "retrieved_timestamp": "1762652580.408656", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nlpguy/Mistral-NeMo-Minitron-Upscale-v2", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nlpguy/Mistral-NeMo-Minitron-Upscale-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15727159492369136 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3949668154807224 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3790833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1926529255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.451 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nlpguy_Mistral-NeMo-Minitron-Upscale-v3/7d2d135a-ab81-49fa-8c17-07f9bd54399d.json b/leaderboard_data/HFOpenLLMv2/mistral/nlpguy_Mistral-NeMo-Minitron-Upscale-v3/7d2d135a-ab81-49fa-8c17-07f9bd54399d.json deleted file mode 100644 index 0104d795c77beea8ef4c00a8b30e056ebf2e23ff..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nlpguy_Mistral-NeMo-Minitron-Upscale-v3/7d2d135a-ab81-49fa-8c17-07f9bd54399d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nlpguy_Mistral-NeMo-Minitron-Upscale-v3/1762652580.408863", - "retrieved_timestamp": "1762652580.408864", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nlpguy/Mistral-NeMo-Minitron-Upscale-v3", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nlpguy/Mistral-NeMo-Minitron-Upscale-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14120976786038822 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30524522602918064 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40984375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11710438829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.451 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/nvidia_Mistral-NeMo-Minitron-8B-Base/7bbc4787-9899-4d90-90c6-dec88bc7dd52.json b/leaderboard_data/HFOpenLLMv2/mistral/nvidia_Mistral-NeMo-Minitron-8B-Base/7bbc4787-9899-4d90-90c6-dec88bc7dd52.json deleted file mode 100644 index fd96f7c824af730c0745624691a507a5ac15aabd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/nvidia_Mistral-NeMo-Minitron-8B-Base/7bbc4787-9899-4d90-90c6-dec88bc7dd52.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nvidia_Mistral-NeMo-Minitron-8B-Base/1762652580.415714", - "retrieved_timestamp": "1762652580.415715", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nvidia/Mistral-NeMo-Minitron-8B-Base", - "developer": "mistral", - "inference_platform": "unknown", - "id": "nvidia/Mistral-NeMo-Minitron-8B-Base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19456597383830457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5219098090521418 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32550335570469796 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40915625000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37957114361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.88 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/pszemraj_Mistral-v0.3-6B/729b4f81-32da-41d2-8fa4-d18553b37b83.json b/leaderboard_data/HFOpenLLMv2/mistral/pszemraj_Mistral-v0.3-6B/729b4f81-32da-41d2-8fa4-d18553b37b83.json deleted file mode 100644 index c0da68ddaf420fbadf7e91e9152354290e4c7680..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/pszemraj_Mistral-v0.3-6B/729b4f81-32da-41d2-8fa4-d18553b37b83.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pszemraj_Mistral-v0.3-6B/1762652580.481565", - "retrieved_timestamp": "1762652580.481566", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pszemraj/Mistral-v0.3-6B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "pszemraj/Mistral-v0.3-6B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2453744952282167 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3774050646438491 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39077083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2142619680851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 5.939 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/shivam9980_mistral-7b-news-cnn-merged/ce626634-c5a4-422d-8b03-1a28108809ce.json b/leaderboard_data/HFOpenLLMv2/mistral/shivam9980_mistral-7b-news-cnn-merged/ce626634-c5a4-422d-8b03-1a28108809ce.json deleted file mode 100644 index daeaee03cf96a700a072bdc771849c9e24b1274d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/shivam9980_mistral-7b-news-cnn-merged/ce626634-c5a4-422d-8b03-1a28108809ce.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/shivam9980_mistral-7b-news-cnn-merged/1762652580.515563", - "retrieved_timestamp": "1762652580.515563", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "shivam9980/mistral-7b-news-cnn-merged", - "developer": "mistral", - "inference_platform": "unknown", - "id": "shivam9980/mistral-7b-news-cnn-merged" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4634192830578421 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3635484854246454 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45226041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28274601063829785 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 7.723 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/shivank21_mistral_dpo_self/7b07e583-36df-47df-8439-224eca2e5761.json b/leaderboard_data/HFOpenLLMv2/mistral/shivank21_mistral_dpo_self/7b07e583-36df-47df-8439-224eca2e5761.json deleted file mode 100644 index fd3a22ff5b0cdc711cc0f4de8f5c4f591804cad1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/shivank21_mistral_dpo_self/7b07e583-36df-47df-8439-224eca2e5761.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/shivank21_mistral_dpo_self/1762652580.5158348", - "retrieved_timestamp": "1762652580.515836", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "shivank21/mistral_dpo_self", - "developer": "mistral", - "inference_platform": "unknown", - "id": "shivank21/mistral_dpo_self" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.340345837932242 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3216256961597798 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2407718120805369 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2214095744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "", - "params_billions": 7.913 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/siqi00_Mistral-7B-DFT/e2f4255d-12ff-4c88-996d-bac6b51aaa33.json b/leaderboard_data/HFOpenLLMv2/mistral/siqi00_Mistral-7B-DFT/e2f4255d-12ff-4c88-996d-bac6b51aaa33.json deleted file mode 100644 index c850a09550d03830c48dfdecef637d78415d12dc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/siqi00_Mistral-7B-DFT/e2f4255d-12ff-4c88-996d-bac6b51aaa33.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/siqi00_Mistral-7B-DFT/1762652580.5171149", - "retrieved_timestamp": "1762652580.5171149", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "siqi00/Mistral-7B-DFT", - "developer": "mistral", - "inference_platform": "unknown", - "id": "siqi00/Mistral-7B-DFT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5568668909604294 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46648773367771273 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0377643504531722 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41911458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2962932180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/siqi00_Mistral-7B-DFT2/dae2a1a6-a608-4b64-a77a-e4aed87e7d7f.json b/leaderboard_data/HFOpenLLMv2/mistral/siqi00_Mistral-7B-DFT2/dae2a1a6-a608-4b64-a77a-e4aed87e7d7f.json deleted file mode 100644 index aff1c6591af1a3e6b507cc299ca488592ed43025..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/siqi00_Mistral-7B-DFT2/dae2a1a6-a608-4b64-a77a-e4aed87e7d7f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/siqi00_Mistral-7B-DFT2/1762652580.5173602", - "retrieved_timestamp": "1762652580.517361", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "siqi00/Mistral-7B-DFT2", - "developer": "mistral", - "inference_platform": "unknown", - "id": "siqi00/Mistral-7B-DFT2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5803723010501026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39683798240076246 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44007291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523936170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/spmurrayzzz_Mistral-Syndicate-7B/80934f3c-8d0b-49be-9f42-e187b4729cff.json b/leaderboard_data/HFOpenLLMv2/mistral/spmurrayzzz_Mistral-Syndicate-7B/80934f3c-8d0b-49be-9f42-e187b4729cff.json deleted file mode 100644 index c7943b5b6406af7cac4e6d48d675b0ac68c8efc9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/spmurrayzzz_Mistral-Syndicate-7B/80934f3c-8d0b-49be-9f42-e187b4729cff.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/spmurrayzzz_Mistral-Syndicate-7B/1762652580.534304", - "retrieved_timestamp": "1762652580.534305", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "spmurrayzzz/Mistral-Syndicate-7B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "spmurrayzzz/Mistral-Syndicate-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.249595517670891 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42450570755678535 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.033987915407854986 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43855208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2631316489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/teknium_CollectiveCognition-v1.1-Mistral-7B/626bfec9-65d1-4250-8d07-d9c8a008b554.json b/leaderboard_data/HFOpenLLMv2/mistral/teknium_CollectiveCognition-v1.1-Mistral-7B/626bfec9-65d1-4250-8d07-d9c8a008b554.json deleted file mode 100644 index 8716ed5bf06bab73a7238593d094aba209ba81f7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/teknium_CollectiveCognition-v1.1-Mistral-7B/626bfec9-65d1-4250-8d07-d9c8a008b554.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/teknium_CollectiveCognition-v1.1-Mistral-7B/1762652580.55394", - "retrieved_timestamp": "1762652580.553941", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "teknium/CollectiveCognition-v1.1-Mistral-7B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "teknium/CollectiveCognition-v1.1-Mistral-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27904626391308396 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4493426704276236 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3869270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28366023936170215 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/teknium_OpenHermes-2-Mistral-7B/f24b2adb-f12d-4dd8-984b-8ab43e15720f.json b/leaderboard_data/HFOpenLLMv2/mistral/teknium_OpenHermes-2-Mistral-7B/f24b2adb-f12d-4dd8-984b-8ab43e15720f.json deleted file mode 100644 index 42fca7b973dd145687fdcbf68a799c439537d94c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/teknium_OpenHermes-2-Mistral-7B/f24b2adb-f12d-4dd8-984b-8ab43e15720f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/teknium_OpenHermes-2-Mistral-7B/1762652580.5544581", - "retrieved_timestamp": "1762652580.5544589", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "teknium/OpenHermes-2-Mistral-7B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "teknium/OpenHermes-2-Mistral-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5286151854856226 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4947516371878204 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45197916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2931349734042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/teknium_OpenHermes-2.5-Mistral-7B/66d1a6cf-41da-4226-a06c-fc99641e754a.json b/leaderboard_data/HFOpenLLMv2/mistral/teknium_OpenHermes-2.5-Mistral-7B/66d1a6cf-41da-4226-a06c-fc99641e754a.json deleted file mode 100644 index c5a1ed2e27b57b4642dd06ddea13405d80af0250..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/teknium_OpenHermes-2.5-Mistral-7B/66d1a6cf-41da-4226-a06c-fc99641e754a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/teknium_OpenHermes-2.5-Mistral-7B/1762652580.554678", - "retrieved_timestamp": "1762652580.5546792", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "teknium/OpenHermes-2.5-Mistral-7B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "teknium/OpenHermes-2.5-Mistral-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5571417173100706 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4870013259924984 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4241979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3054355053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/tensopolis_mistral-small-2501-tensopolis-v1/53ec68aa-e4fc-430f-8ccf-f5886f1b9d4b.json b/leaderboard_data/HFOpenLLMv2/mistral/tensopolis_mistral-small-2501-tensopolis-v1/53ec68aa-e4fc-430f-8ccf-f5886f1b9d4b.json deleted file mode 100644 index 8755a9e8b00d2c423063fa8265790b4d2e2b150b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/tensopolis_mistral-small-2501-tensopolis-v1/53ec68aa-e4fc-430f-8ccf-f5886f1b9d4b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tensopolis_mistral-small-2501-tensopolis-v1/1762652580.555758", - "retrieved_timestamp": "1762652580.555758", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tensopolis/mistral-small-2501-tensopolis-v1", - "developer": "mistral", - "inference_platform": "unknown", - "id": "tensopolis/mistral-small-2501-tensopolis-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7762104549262623 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6474735931872574 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44410876132930516 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3573825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42797916666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4464760638297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/tensopolis_mistral-small-r1-tensopolis/b2ee17e1-3d66-4622-8ea9-3bf8747371a5.json b/leaderboard_data/HFOpenLLMv2/mistral/tensopolis_mistral-small-r1-tensopolis/b2ee17e1-3d66-4622-8ea9-3bf8747371a5.json deleted file mode 100644 index c4888e1ff0eaed1178e07769b783b3b42e9dbf68..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/tensopolis_mistral-small-r1-tensopolis/b2ee17e1-3d66-4622-8ea9-3bf8747371a5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tensopolis_mistral-small-r1-tensopolis/1762652580.556001", - "retrieved_timestamp": "1762652580.5560021", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tensopolis/mistral-small-r1-tensopolis", - "developer": "mistral", - "inference_platform": "unknown", - "id": "tensopolis/mistral-small-r1-tensopolis" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.462220242290456 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5435969591888976 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.290785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4035073138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/theprint_Conversely-Mistral-7B/5adde1ed-2d8f-4aa6-96f9-042df5358747.json b/leaderboard_data/HFOpenLLMv2/mistral/theprint_Conversely-Mistral-7B/5adde1ed-2d8f-4aa6-96f9-042df5358747.json deleted file mode 100644 index 6cdafee1685976459dd75c54c01b36f9e2903185..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/theprint_Conversely-Mistral-7B/5adde1ed-2d8f-4aa6-96f9-042df5358747.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/theprint_Conversely-Mistral-7B/1762652580.56185", - "retrieved_timestamp": "1762652580.5618508", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "theprint/Conversely-Mistral-7B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "theprint/Conversely-Mistral-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2608113139802391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4672348146697077 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4188958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28257978723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 14.496 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/tianyil1_MistralForCausalLM_Cal_DPO/9902ef50-5208-4053-bb90-e08c98211b3f.json b/leaderboard_data/HFOpenLLMv2/mistral/tianyil1_MistralForCausalLM_Cal_DPO/9902ef50-5208-4053-bb90-e08c98211b3f.json deleted file mode 100644 index fb8fb64ac5f07d090b8ae640c7756ca953b7874f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/tianyil1_MistralForCausalLM_Cal_DPO/9902ef50-5208-4053-bb90-e08c98211b3f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tianyil1_MistralForCausalLM_Cal_DPO/1762652580.566411", - "retrieved_timestamp": "1762652580.566412", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tianyil1/MistralForCausalLM_Cal_DPO", - "developer": "mistral", - "inference_platform": "unknown", - "id": "tianyil1/MistralForCausalLM_Cal_DPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5327619604870633 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43814239617517153 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39765625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2763464095744681 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/uukuguy_speechless-code-mistral-7b-v1.0/cebdb6d6-a12c-47f6-b912-4b8e98763c48.json b/leaderboard_data/HFOpenLLMv2/mistral/uukuguy_speechless-code-mistral-7b-v1.0/cebdb6d6-a12c-47f6-b912-4b8e98763c48.json deleted file mode 100644 index 75eb4e6b28f1c2f3d255ba9e9a3801759dfbb318..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/uukuguy_speechless-code-mistral-7b-v1.0/cebdb6d6-a12c-47f6-b912-4b8e98763c48.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/uukuguy_speechless-code-mistral-7b-v1.0/1762652580.581523", - "retrieved_timestamp": "1762652580.581524", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "uukuguy/speechless-code-mistral-7b-v1.0", - "developer": "mistral", - "inference_platform": "unknown", - "id": "uukuguy/speechless-code-mistral-7b-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36652415590632853 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4571712887094195 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45017708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145777925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/vicgalle_Merge-Mistral-Prometheus-7B/ecfdb6a4-36d7-4252-9677-10655b3855e5.json b/leaderboard_data/HFOpenLLMv2/mistral/vicgalle_Merge-Mistral-Prometheus-7B/ecfdb6a4-36d7-4252-9677-10655b3855e5.json deleted file mode 100644 index dd973e82511964ff80b5c228636974c427c1729b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/vicgalle_Merge-Mistral-Prometheus-7B/ecfdb6a4-36d7-4252-9677-10655b3855e5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/vicgalle_Merge-Mistral-Prometheus-7B/1762652580.5881548", - "retrieved_timestamp": "1762652580.5881548", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "vicgalle/Merge-Mistral-Prometheus-7B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "vicgalle/Merge-Mistral-Prometheus-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48480143796238423 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.420139773821292 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2716921542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/xinchen9_Mistral-7B-CoT/6c54d5e2-7fca-4fa3-9d04-0f44d0651018.json b/leaderboard_data/HFOpenLLMv2/mistral/xinchen9_Mistral-7B-CoT/6c54d5e2-7fca-4fa3-9d04-0f44d0651018.json deleted file mode 100644 index 75b6cc60a97709fd3d42e8e6819d34c7e4861af2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/xinchen9_Mistral-7B-CoT/6c54d5e2-7fca-4fa3-9d04-0f44d0651018.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/xinchen9_Mistral-7B-CoT/1762652580.5978932", - "retrieved_timestamp": "1762652580.597894", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "xinchen9/Mistral-7B-CoT", - "developer": "mistral", - "inference_platform": "unknown", - "id": "xinchen9/Mistral-7B-CoT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2783470081605695 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38726762098069667 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3994270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2283909574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/yam-peleg_Hebrew-Mistral-7B-200K/4d45347d-4491-4d7b-9abe-02c42974f520.json b/leaderboard_data/HFOpenLLMv2/mistral/yam-peleg_Hebrew-Mistral-7B-200K/4d45347d-4491-4d7b-9abe-02c42974f520.json deleted file mode 100644 index 9e11ff27c0ae31cda96b93836476434e0f564479..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/yam-peleg_Hebrew-Mistral-7B-200K/4d45347d-4491-4d7b-9abe-02c42974f520.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/yam-peleg_Hebrew-Mistral-7B-200K/1762652580.6038961", - "retrieved_timestamp": "1762652580.603897", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "yam-peleg/Hebrew-Mistral-7B-200K", - "developer": "mistral", - "inference_platform": "unknown", - "id": "yam-peleg/Hebrew-Mistral-7B-200K" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17698041197356346 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3410500846818921 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37399999999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2529089095744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.504 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/yam-peleg_Hebrew-Mistral-7B-200K/83a71a32-796a-4fec-9513-2f4b5e032749.json b/leaderboard_data/HFOpenLLMv2/mistral/yam-peleg_Hebrew-Mistral-7B-200K/83a71a32-796a-4fec-9513-2f4b5e032749.json deleted file mode 100644 index faa496f2fb22de52604fe77e807d71f46d59a65f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/yam-peleg_Hebrew-Mistral-7B-200K/83a71a32-796a-4fec-9513-2f4b5e032749.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/yam-peleg_Hebrew-Mistral-7B-200K/1762652580.6036632", - "retrieved_timestamp": "1762652580.603664", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "yam-peleg/Hebrew-Mistral-7B-200K", - "developer": "mistral", - "inference_platform": "unknown", - "id": "yam-peleg/Hebrew-Mistral-7B-200K" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1855731680829089 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4149272793394017 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3764791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25731382978723405 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.504 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistral/yam-peleg_Hebrew-Mistral-7B/99c28dc3-f614-430a-99d7-31c2218c4d7f.json b/leaderboard_data/HFOpenLLMv2/mistral/yam-peleg_Hebrew-Mistral-7B/99c28dc3-f614-430a-99d7-31c2218c4d7f.json deleted file mode 100644 index daea51f5d4baab0cb27739a4d7c04e0d1e1fd120..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistral/yam-peleg_Hebrew-Mistral-7B/99c28dc3-f614-430a-99d7-31c2218c4d7f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/yam-peleg_Hebrew-Mistral-7B/1762652580.603384", - "retrieved_timestamp": "1762652580.603385", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "yam-peleg/Hebrew-Mistral-7B", - "developer": "mistral", - "inference_platform": "unknown", - "id": "yam-peleg/Hebrew-Mistral-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23283443485507344 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43340366992362034 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39765625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27800864361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.504 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Ministral-8B-Instruct-2410/d0cfd22e-6bad-4784-a172-76892d44f70b.json b/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Ministral-8B-Instruct-2410/d0cfd22e-6bad-4784-a172-76892d44f70b.json deleted file mode 100644 index 4d94a1b4fb7ab731d6d2c3ea8d99d52f91248206..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Ministral-8B-Instruct-2410/d0cfd22e-6bad-4784-a172-76892d44f70b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mistralai_Ministral-8B-Instruct-2410/1762652580.361781", - "retrieved_timestamp": "1762652580.361782", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mistralai/Ministral-8B-Instruct-2410", - "developer": "mistralai", - "inference_platform": "unknown", - "id": "mistralai/Ministral-8B-Instruct-2410" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5896399331551394 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47616402016891385 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19561933534743203 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3291223404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.02 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mistral-7B-Instruct-v0.1/ef779e6f-1c12-4237-aa45-e6315ed01d92.json b/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mistral-7B-Instruct-v0.1/ef779e6f-1c12-4237-aa45-e6315ed01d92.json deleted file mode 100644 index 17dea97c69783fee8f4afcfa5619d1fe3e7b399f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mistral-7B-Instruct-v0.1/ef779e6f-1c12-4237-aa45-e6315ed01d92.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mistralai_Mistral-7B-Instruct-v0.1/1762652580.3620229", - "retrieved_timestamp": "1762652580.3620229", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mistralai/Mistral-7B-Instruct-v0.1", - "developer": "mistralai", - "inference_platform": "unknown", - "id": "mistralai/Mistral-7B-Instruct-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4487060998151571 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33548084759810987 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.022658610271903322 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38476041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24143949468085107 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mistral-7B-Instruct-v0.2/fb55e940-f03d-4d79-9363-ec17eebf9596.json b/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mistral-7B-Instruct-v0.2/fb55e940-f03d-4d79-9363-ec17eebf9596.json deleted file mode 100644 index a3e5ab858be2ec83e1cd3001d42d2e6f3528213c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mistral-7B-Instruct-v0.2/fb55e940-f03d-4d79-9363-ec17eebf9596.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mistralai_Mistral-7B-Instruct-v0.2/1762652580.362234", - "retrieved_timestamp": "1762652580.3622348", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mistralai/Mistral-7B-Instruct-v0.2", - "developer": "mistralai", - "inference_platform": "unknown", - "id": "mistralai/Mistral-7B-Instruct-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5496227786717023 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44597355203292793 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39660416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2716921542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mistral-7B-Instruct-v0.3/ddc775e5-a4cc-49bd-ace3-113f325134c0.json b/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mistral-7B-Instruct-v0.3/ddc775e5-a4cc-49bd-ace3-113f325134c0.json deleted file mode 100644 index a14a4067487bd291217922a7af3ae423aa65907d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mistral-7B-Instruct-v0.3/ddc775e5-a4cc-49bd-ace3-113f325134c0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mistralai_Mistral-7B-Instruct-v0.3/1762652580.362444", - "retrieved_timestamp": "1762652580.362445", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mistralai/Mistral-7B-Instruct-v0.3", - "developer": "mistralai", - "inference_platform": "unknown", - "id": "mistralai/Mistral-7B-Instruct-v0.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5465254413844156 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47219631712648397 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37390625000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30751329787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mistral-Large-Instruct-2411/1f2c9c0c-7e71-4886-9980-300a7ae5c55e.json b/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mistral-Large-Instruct-2411/1f2c9c0c-7e71-4886-9980-300a7ae5c55e.json deleted file mode 100644 index 7fe0e1393a373a980163f2fd121bad4e8febc096..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mistral-Large-Instruct-2411/1f2c9c0c-7e71-4886-9980-300a7ae5c55e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mistralai_Mistral-Large-Instruct-2411/1762652580.3630579", - "retrieved_timestamp": "1762652580.363059", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mistralai/Mistral-Large-Instruct-2411", - "developer": "mistralai", - "inference_platform": "unknown", - "id": "mistralai/Mistral-Large-Instruct-2411" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8400577135334246 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6746647735675069 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4954682779456193 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43708053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.454 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5561835106382979 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 122.61 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mistral-Nemo-Instruct-2407/3758a033-b197-403b-ab9e-7457856f3ebc.json b/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mistral-Nemo-Instruct-2407/3758a033-b197-403b-ab9e-7457856f3ebc.json deleted file mode 100644 index 8f03390874d54ef755ec2b251831fa28ac933def..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mistral-Nemo-Instruct-2407/3758a033-b197-403b-ab9e-7457856f3ebc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mistralai_Mistral-Nemo-Instruct-2407/1762652580.363499", - "retrieved_timestamp": "1762652580.363499", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mistralai/Mistral-Nemo-Instruct-2407", - "developer": "mistralai", - "inference_platform": "unknown", - "id": "mistralai/Mistral-Nemo-Instruct-2407" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6380248850826917 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5036523950310812 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1268882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38999999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3517287234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mistral-Small-Instruct-2409/15f66094-73f1-4302-adad-69522872682d.json b/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mistral-Small-Instruct-2409/15f66094-73f1-4302-adad-69522872682d.json deleted file mode 100644 index 2dc05c6a2a1ba9994f140d2743a8947e0f013a9f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mistral-Small-Instruct-2409/15f66094-73f1-4302-adad-69522872682d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mistralai_Mistral-Small-Instruct-2409/1762652580.363916", - "retrieved_timestamp": "1762652580.363917", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mistralai/Mistral-Small-Instruct-2409", - "developer": "mistralai", - "inference_platform": "unknown", - "id": "mistralai/Mistral-Small-Instruct-2409" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.666975846310013 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5213075098146217 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36320833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39602726063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.05 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mistral-Small-Instruct-2409/a85d1dbd-465b-42c8-baf5-0e7a7ca00725.json b/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mistral-Small-Instruct-2409/a85d1dbd-465b-42c8-baf5-0e7a7ca00725.json deleted file mode 100644 index af0d0073a9ebe89414aa7500f0c89e988000309e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mistral-Small-Instruct-2409/a85d1dbd-465b-42c8-baf5-0e7a7ca00725.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mistralai_Mistral-Small-Instruct-2409/1762652580.364117", - "retrieved_timestamp": "1762652580.364118", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mistralai/Mistral-Small-Instruct-2409", - "developer": "mistralai", - "inference_platform": "unknown", - "id": "mistralai/Mistral-Small-Instruct-2409" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6282829558903709 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5830283846898211 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4063333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.409906914893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mixtral-8x22B-Instruct-v0.1/ee88881e-cdeb-4a55-b784-6b41b983d7aa.json b/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mixtral-8x22B-Instruct-v0.1/ee88881e-cdeb-4a55-b784-6b41b983d7aa.json deleted file mode 100644 index 94fa837fd94eea2761bd734497a37a0c496b7316..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mixtral-8x22B-Instruct-v0.1/ee88881e-cdeb-4a55-b784-6b41b983d7aa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mistralai_Mixtral-8x22B-Instruct-v0.1/1762652580.3642921", - "retrieved_timestamp": "1762652580.3642921", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mistralai/Mixtral-8x22B-Instruct-v0.1", - "developer": "mistralai", - "inference_platform": "unknown", - "id": "mistralai/Mixtral-8x22B-Instruct-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7183584001560305 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6124924926272018 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18731117824773413 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43111458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44830452127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 140.621 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mixtral-8x7B-Instruct-v0.1/2e1de889-2df9-4c81-b5ce-c00c602704b7.json b/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mixtral-8x7B-Instruct-v0.1/2e1de889-2df9-4c81-b5ce-c00c602704b7.json deleted file mode 100644 index 12d85825e80ed088925a831e57a2328a3cd0083f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mistralai/mistralai_Mixtral-8x7B-Instruct-v0.1/2e1de889-2df9-4c81-b5ce-c00c602704b7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mistralai_Mixtral-8x7B-Instruct-v0.1/1762652580.364703", - "retrieved_timestamp": "1762652580.364704", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mistralai/Mixtral-8x7B-Instruct-v0.1", - "developer": "mistralai", - "inference_platform": "unknown", - "id": "mistralai/Mixtral-8x7B-Instruct-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5599143605633053 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49623654013356494 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09138972809667674 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42032291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36918218085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mixtao/mixtao_MixTAO-7Bx2-MoE-v8.1/a6032673-fee4-4c8c-97fa-167729f495d6.json b/leaderboard_data/HFOpenLLMv2/mixtao/mixtao_MixTAO-7Bx2-MoE-v8.1/a6032673-fee4-4c8c-97fa-167729f495d6.json deleted file mode 100644 index 367285450162912f4164bcd130fc31005b0c02a8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mixtao/mixtao_MixTAO-7Bx2-MoE-v8.1/a6032673-fee4-4c8c-97fa-167729f495d6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mixtao_MixTAO-7Bx2-MoE-v8.1/1762652580.3653471", - "retrieved_timestamp": "1762652580.365348", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mixtao/MixTAO-7Bx2-MoE-v8.1", - "developer": "mixtao", - "inference_platform": "unknown", - "id": "mixtao/MixTAO-7Bx2-MoE-v8.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41623337189767595 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5189059391733521 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4463333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3123337765957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mkxu/mkxu_llama-3-8b-instruct-fpo/0ba6add2-4495-4261-baab-224c0b6c683f.json b/leaderboard_data/HFOpenLLMv2/mkxu/mkxu_llama-3-8b-instruct-fpo/0ba6add2-4495-4261-baab-224c0b6c683f.json deleted file mode 100644 index e0004bc692818aefbbb1fd151cc64c848f80b8df..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mkxu/mkxu_llama-3-8b-instruct-fpo/0ba6add2-4495-4261-baab-224c0b6c683f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mkxu_llama-3-8b-instruct-fpo/1762652580.366677", - "retrieved_timestamp": "1762652580.366678", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mkxu/llama-3-8b-instruct-fpo", - "developer": "mkxu", - "inference_platform": "unknown", - "id": "mkxu/llama-3-8b-instruct-fpo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6790161216682846 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4959114413700331 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36578125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36045545212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_AlphaMonarch-7B/d7eb4408-6857-4df1-b92b-9dd4712a4f23.json b/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_AlphaMonarch-7B/d7eb4408-6857-4df1-b92b-9dd4712a4f23.json deleted file mode 100644 index 5c03aaf240c31b62c7aaf5e5728dbe39b1d088bc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_AlphaMonarch-7B/d7eb4408-6857-4df1-b92b-9dd4712a4f23.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mlabonne_AlphaMonarch-7B/1762652580.367184", - "retrieved_timestamp": "1762652580.3671849", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mlabonne/AlphaMonarch-7B", - "developer": "mlabonne", - "inference_platform": "unknown", - "id": "mlabonne/AlphaMonarch-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49394384677101205 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4625522037183211 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41213541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24725731382978725 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_Beyonder-4x7B-v3/b0867447-6dd9-453c-af09-da0db5651e65.json b/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_Beyonder-4x7B-v3/b0867447-6dd9-453c-af09-da0db5651e65.json deleted file mode 100644 index 1ebe138561d28ed4d8dd696469f900b63564e331..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_Beyonder-4x7B-v3/b0867447-6dd9-453c-af09-da0db5651e65.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mlabonne_Beyonder-4x7B-v3/1762652580.36743", - "retrieved_timestamp": "1762652580.367431", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mlabonne/Beyonder-4x7B-v3", - "developer": "mlabonne", - "inference_platform": "unknown", - "id": "mlabonne/Beyonder-4x7B-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5608385749810503 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4670522037183211 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28523489932885904 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40454166666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2512466755319149 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.154 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_BigQwen2.5-52B-Instruct/b18517f1-db51-43a8-812f-75aeccae508f.json b/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_BigQwen2.5-52B-Instruct/b18517f1-db51-43a8-812f-75aeccae508f.json deleted file mode 100644 index 2c05f364ff8463b4591050dfa1e790de3df14134..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_BigQwen2.5-52B-Instruct/b18517f1-db51-43a8-812f-75aeccae508f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mlabonne_BigQwen2.5-52B-Instruct/1762652580.3676438", - "retrieved_timestamp": "1762652580.367645", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mlabonne/BigQwen2.5-52B-Instruct", - "developer": "mlabonne", - "inference_platform": "unknown", - "id": "mlabonne/BigQwen2.5-52B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7913480675718205 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7121004678698547 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.547583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41130208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5519448138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 52.268 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_BigQwen2.5-Echo-47B-Instruct/12efcd4e-13cc-46e5-964a-35d4be69a01e.json b/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_BigQwen2.5-Echo-47B-Instruct/12efcd4e-13cc-46e5-964a-35d4be69a01e.json deleted file mode 100644 index cf8eee2696a5e9c2e86f7155fd1f883100465ead..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_BigQwen2.5-Echo-47B-Instruct/12efcd4e-13cc-46e5-964a-35d4be69a01e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mlabonne_BigQwen2.5-Echo-47B-Instruct/1762652580.36785", - "retrieved_timestamp": "1762652580.36785", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mlabonne/BigQwen2.5-Echo-47B-Instruct", - "developer": "mlabonne", - "inference_platform": "unknown", - "id": "mlabonne/BigQwen2.5-Echo-47B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7356691356711305 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6125111878044905 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4124791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4734042553191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 47.392 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_Daredevil-8B-abliterated/3ad89b65-5719-4e54-aadf-c10d3f27857a.json b/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_Daredevil-8B-abliterated/3ad89b65-5719-4e54-aadf-c10d3f27857a.json deleted file mode 100644 index a20078ae0e62084cd3706ede4129e81f20ce9b74..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_Daredevil-8B-abliterated/3ad89b65-5719-4e54-aadf-c10d3f27857a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mlabonne_Daredevil-8B-abliterated/1762652580.3686998", - "retrieved_timestamp": "1762652580.3686998", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mlabonne/Daredevil-8B-abliterated", - "developer": "mlabonne", - "inference_platform": "unknown", - "id": "mlabonne/Daredevil-8B-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44263664853699297 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4254272523147253 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09441087613293052 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40702083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3700964095744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_Daredevil-8B/4653087e-b528-47c1-86eb-0166538229bc.json b/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_Daredevil-8B/4653087e-b528-47c1-86eb-0166538229bc.json deleted file mode 100644 index f299341cdfc491fdde2426eae685b7884ebfa425..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_Daredevil-8B/4653087e-b528-47c1-86eb-0166538229bc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mlabonne_Daredevil-8B/1762652580.368499", - "retrieved_timestamp": "1762652580.3685", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mlabonne/Daredevil-8B", - "developer": "mlabonne", - "inference_platform": "unknown", - "id": "mlabonne/Daredevil-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45477665926408595 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5194408746721715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.393875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.383061835106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_Meta-Llama-3.1-8B-Instruct-abliterated/605f3f59-204e-4332-8b4e-9da04871ca1b.json b/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_Meta-Llama-3.1-8B-Instruct-abliterated/605f3f59-204e-4332-8b4e-9da04871ca1b.json deleted file mode 100644 index 22e1c8b4705c7c9f9ab7cc9a00d5d50a2bf98cf4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_Meta-Llama-3.1-8B-Instruct-abliterated/605f3f59-204e-4332-8b4e-9da04871ca1b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mlabonne_Meta-Llama-3.1-8B-Instruct-abliterated/1762652580.369122", - "retrieved_timestamp": "1762652580.369123", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", - "developer": "mlabonne", - "inference_platform": "unknown", - "id": "mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7329463601023063 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48740648734902187 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36488541666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3503158244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_NeuralBeagle14-7B/0bfec228-5bfb-4662-8be5-ad910b5bc3bd.json b/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_NeuralBeagle14-7B/0bfec228-5bfb-4662-8be5-ad910b5bc3bd.json deleted file mode 100644 index 70bbaabdae6fceac4e278ea691ed91f3c728ce91..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_NeuralBeagle14-7B/0bfec228-5bfb-4662-8be5-ad910b5bc3bd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mlabonne_NeuralBeagle14-7B/1762652580.369343", - "retrieved_timestamp": "1762652580.369343", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mlabonne/NeuralBeagle14-7B", - "developer": "mlabonne", - "inference_platform": "unknown", - "id": "mlabonne/NeuralBeagle14-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49351941736813876 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46278709452353844 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05211480362537765 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43194791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2601396276595745 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_NeuralDaredevil-8B-abliterated/05fe5948-c228-46f5-ac96-3c234bc5b3ce.json b/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_NeuralDaredevil-8B-abliterated/05fe5948-c228-46f5-ac96-3c234bc5b3ce.json deleted file mode 100644 index 617dc51cc35b3a363f46052e9970d76569db03a4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_NeuralDaredevil-8B-abliterated/05fe5948-c228-46f5-ac96-3c234bc5b3ce.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mlabonne_NeuralDaredevil-8B-abliterated/1762652580.369559", - "retrieved_timestamp": "1762652580.36956", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mlabonne/NeuralDaredevil-8B-abliterated", - "developer": "mlabonne", - "inference_platform": "unknown", - "id": "mlabonne/NeuralDaredevil-8B-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.756077208473517 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5110566504436299 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09063444108761329 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4019375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38414228723404253 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_NeuralDaredevil-8B-abliterated/d4b40160-579a-4e66-96a2-8441e5c02694.json b/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_NeuralDaredevil-8B-abliterated/d4b40160-579a-4e66-96a2-8441e5c02694.json deleted file mode 100644 index fc07e360013a8e551a0acaa16af03906365f3c25..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mlabonne/mlabonne_NeuralDaredevil-8B-abliterated/d4b40160-579a-4e66-96a2-8441e5c02694.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mlabonne_NeuralDaredevil-8B-abliterated/1762652580.369774", - "retrieved_timestamp": "1762652580.369775", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mlabonne/NeuralDaredevil-8B-abliterated", - "developer": "mlabonne", - "inference_platform": "unknown", - "id": "mlabonne/NeuralDaredevil-8B-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41623337189767595 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5123964057729099 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08534743202416918 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4149583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3801529255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mlx-community/mlx-community_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32/9bf2a7e3-e744-4ac0-853a-f5cec8ef9c57.json b/leaderboard_data/HFOpenLLMv2/mlx-community/mlx-community_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32/9bf2a7e3-e744-4ac0-853a-f5cec8ef9c57.json deleted file mode 100644 index 4ea828776aa0cc39d237c705ba475608b0feb737..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mlx-community/mlx-community_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32/9bf2a7e3-e744-4ac0-853a-f5cec8ef9c57.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mlx-community_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32/1762652580.3704169", - "retrieved_timestamp": "1762652580.3704178", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mlx-community/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32", - "developer": "mlx-community", - "inference_platform": "unknown", - "id": "mlx-community/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3368983186833158 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32921013057720044 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3249166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16381316489361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mlx-community/mlx-community_Mistral-Small-24B-Instruct-2501-bf16/d769592a-faa3-4269-abac-373679f42c62.json b/leaderboard_data/HFOpenLLMv2/mlx-community/mlx-community_Mistral-Small-24B-Instruct-2501-bf16/d769592a-faa3-4269-abac-373679f42c62.json deleted file mode 100644 index f73fe4f49e6501c4222f86f96405975a46b140c3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mlx-community/mlx-community_Mistral-Small-24B-Instruct-2501-bf16/d769592a-faa3-4269-abac-373679f42c62.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mlx-community_Mistral-Small-24B-Instruct-2501-bf16/1762652580.3707452", - "retrieved_timestamp": "1762652580.3707461", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mlx-community/Mistral-Small-24B-Instruct-2501-bf16", - "developer": "mlx-community", - "inference_platform": "unknown", - "id": "mlx-community/Mistral-Small-24B-Instruct-2501-bf16" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6282829558903709 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6713272911918485 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32250755287009064 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4618333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5394780585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 23.572 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/moeru-ai/moeru-ai_L3.1-Moe-2x8B-v0.2/cf47622f-c921-4610-adef-bed2a4670249.json b/leaderboard_data/HFOpenLLMv2/moeru-ai/moeru-ai_L3.1-Moe-2x8B-v0.2/cf47622f-c921-4610-adef-bed2a4670249.json deleted file mode 100644 index d49c92766dba26d39f05bd793f83635bd92b6d15..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/moeru-ai/moeru-ai_L3.1-Moe-2x8B-v0.2/cf47622f-c921-4610-adef-bed2a4670249.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/moeru-ai_L3.1-Moe-2x8B-v0.2/1762652580.371698", - "retrieved_timestamp": "1762652580.3716989", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "moeru-ai/L3.1-Moe-2x8B-v0.2", - "developer": "moeru-ai", - "inference_platform": "unknown", - "id": "moeru-ai/L3.1-Moe-2x8B-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7347947889377962 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5255688392585965 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16993957703927492 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41985416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38580452127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 13.668 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/moeru-ai/moeru-ai_L3.1-Moe-4x8B-v0.1/bbcae028-046e-4e87-b991-5d7b92c42cc2.json b/leaderboard_data/HFOpenLLMv2/moeru-ai/moeru-ai_L3.1-Moe-4x8B-v0.1/bbcae028-046e-4e87-b991-5d7b92c42cc2.json deleted file mode 100644 index 3595645edd020d213f95911e3916fa72cf3db271..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/moeru-ai/moeru-ai_L3.1-Moe-4x8B-v0.1/bbcae028-046e-4e87-b991-5d7b92c42cc2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/moeru-ai_L3.1-Moe-4x8B-v0.1/1762652580.371937", - "retrieved_timestamp": "1762652580.371938", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "moeru-ai/L3.1-Moe-4x8B-v0.1", - "developer": "moeru-ai", - "inference_platform": "unknown", - "id": "moeru-ai/L3.1-Moe-4x8B-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.433219413378724 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49392781736367014 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3609166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34541223404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.942 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/moeru-ai/moeru-ai_L3.1-Moe-4x8B-v0.2/e6fe5591-f6aa-40c6-897f-f90084682109.json b/leaderboard_data/HFOpenLLMv2/moeru-ai/moeru-ai_L3.1-Moe-4x8B-v0.2/e6fe5591-f6aa-40c6-897f-f90084682109.json deleted file mode 100644 index 6346742cd86829a79a7cd13c4007afe1e7286886..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/moeru-ai/moeru-ai_L3.1-Moe-4x8B-v0.2/e6fe5591-f6aa-40c6-897f-f90084682109.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/moeru-ai_L3.1-Moe-4x8B-v0.2/1762652580.372139", - "retrieved_timestamp": "1762652580.37214", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "moeru-ai/L3.1-Moe-4x8B-v0.2", - "developer": "moeru-ai", - "inference_platform": "unknown", - "id": "moeru-ai/L3.1-Moe-4x8B-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5406554608438943 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.446625675582615 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3233958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27626329787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.942 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/monsterapi/monsterapi_Llama-3_1-8B-Instruct-orca-ORPO/b70a3980-7b0b-4bb1-878f-c2d49f9df09e.json b/leaderboard_data/HFOpenLLMv2/monsterapi/monsterapi_Llama-3_1-8B-Instruct-orca-ORPO/b70a3980-7b0b-4bb1-878f-c2d49f9df09e.json deleted file mode 100644 index 0a138e1391ea1d40c00cc7aa58e17fc2df962d7e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/monsterapi/monsterapi_Llama-3_1-8B-Instruct-orca-ORPO/b70a3980-7b0b-4bb1-878f-c2d49f9df09e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/monsterapi_Llama-3_1-8B-Instruct-orca-ORPO/1762652580.3723478", - "retrieved_timestamp": "1762652580.3723478", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "monsterapi/Llama-3_1-8B-Instruct-orca-ORPO", - "developer": "monsterapi", - "inference_platform": "unknown", - "id": "monsterapi/Llama-3_1-8B-Instruct-orca-ORPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22728914834860392 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28653625778742803 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34447916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11677194148936171 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 16.061 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mosaicml/mosaicml_mpt-7b/5e55c7ee-90f6-40a4-83ca-4a3acdad40f2.json b/leaderboard_data/HFOpenLLMv2/mosaicml/mosaicml_mpt-7b/5e55c7ee-90f6-40a4-83ca-4a3acdad40f2.json deleted file mode 100644 index 785d01b371f2dafffef314cd8e03259b7d1e5f79..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mosaicml/mosaicml_mpt-7b/5e55c7ee-90f6-40a4-83ca-4a3acdad40f2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mosaicml_mpt-7b/1762652580.3728561", - "retrieved_timestamp": "1762652580.372857", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mosaicml/mpt-7b", - "developer": "mosaicml", - "inference_platform": "unknown", - "id": "mosaicml/mpt-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21519900530592162 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32997415960801324 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36723958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12059507978723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MPTForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mosama/mosama_Qwen2.5-1.5B-Instruct-CoT-Reflection/e0d9dbcc-8df2-4207-b849-2c4984340605.json b/leaderboard_data/HFOpenLLMv2/mosama/mosama_Qwen2.5-1.5B-Instruct-CoT-Reflection/e0d9dbcc-8df2-4207-b849-2c4984340605.json deleted file mode 100644 index 8356faa84ed38f43f3ea173ca69ccd62e6554670..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mosama/mosama_Qwen2.5-1.5B-Instruct-CoT-Reflection/e0d9dbcc-8df2-4207-b849-2c4984340605.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mosama_Qwen2.5-1.5B-Instruct-CoT-Reflection/1762652580.373101", - "retrieved_timestamp": "1762652580.3731022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mosama/Qwen2.5-1.5B-Instruct-CoT-Reflection", - "developer": "mosama", - "inference_platform": "unknown", - "id": "mosama/Qwen2.5-1.5B-Instruct-CoT-Reflection" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2870394996387363 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41093712633583523 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3211979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26512632978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mrdayl/mrdayl_OpenCogito/aacaba19-8c17-4d20-b27b-672810272ed4.json b/leaderboard_data/HFOpenLLMv2/mrdayl/mrdayl_OpenCogito/aacaba19-8c17-4d20-b27b-672810272ed4.json deleted file mode 100644 index 4b39b30bd034baa30acd13d3374c41ce6c770649..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mrdayl/mrdayl_OpenCogito/aacaba19-8c17-4d20-b27b-672810272ed4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mrdayl_OpenCogito/1762652580.373355", - "retrieved_timestamp": "1762652580.373356", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mrdayl/OpenCogito", - "developer": "mrdayl", - "inference_platform": "unknown", - "id": "mrdayl/OpenCogito" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3933773498761065 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47196973414577464 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21827794561933533 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42401041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3451628989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mrdayl/mrdayl_OpenCognito-r1/91e89f4c-d05b-476a-a8d9-0186ef8d1418.json b/leaderboard_data/HFOpenLLMv2/mrdayl/mrdayl_OpenCognito-r1/91e89f4c-d05b-476a-a8d9-0186ef8d1418.json deleted file mode 100644 index 75516e4cbf869c97990f280b586bc1e3a016c16b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mrdayl/mrdayl_OpenCognito-r1/91e89f4c-d05b-476a-a8d9-0186ef8d1418.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mrdayl_OpenCognito-r1/1762652580.3737972", - "retrieved_timestamp": "1762652580.373798", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mrdayl/OpenCognito-r1", - "developer": "mrdayl", - "inference_platform": "unknown", - "id": "mrdayl/OpenCognito-r1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42412687225450696 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4673346036303057 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1903323262839879 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42407291666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3474900265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mrdayl/mrdayl_OpenCognito-r2/672c6991-3c7b-48c3-9e95-389175e7cd6b.json b/leaderboard_data/HFOpenLLMv2/mrdayl/mrdayl_OpenCognito-r2/672c6991-3c7b-48c3-9e95-389175e7cd6b.json deleted file mode 100644 index 7d9968dfd70038636e137ff4686bd4731ec83305..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mrdayl/mrdayl_OpenCognito-r2/672c6991-3c7b-48c3-9e95-389175e7cd6b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mrdayl_OpenCognito-r2/1762652580.373997", - "retrieved_timestamp": "1762652580.3739982", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mrdayl/OpenCognito-r2", - "developer": "mrdayl", - "inference_platform": "unknown", - "id": "mrdayl/OpenCognito-r2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3958751667797001 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46882818163435913 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20241691842900303 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42016666666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34616023936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mrdayl/mrdayl_OpenCognito/049eb195-7ca8-42a7-bf2a-e072b7929958.json b/leaderboard_data/HFOpenLLMv2/mrdayl/mrdayl_OpenCognito/049eb195-7ca8-42a7-bf2a-e072b7929958.json deleted file mode 100644 index f9199d0a4c96d4571767a1972778228f72ea7d98..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mrdayl/mrdayl_OpenCognito/049eb195-7ca8-42a7-bf2a-e072b7929958.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mrdayl_OpenCognito/1762652580.373594", - "retrieved_timestamp": "1762652580.373594", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mrdayl/OpenCognito", - "developer": "mrdayl", - "inference_platform": "unknown", - "id": "mrdayl/OpenCognito" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40621661635571393 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4705607805549634 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21148036253776434 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42934374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3443317819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/mrdayl/mrdayl_OpenThink/ae71ec28-7e22-42c4-8549-4334dff8a811.json b/leaderboard_data/HFOpenLLMv2/mrdayl/mrdayl_OpenThink/ae71ec28-7e22-42c4-8549-4334dff8a811.json deleted file mode 100644 index e565eb932ccf5caaf93e97e86b5e15a9c1e97373..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/mrdayl/mrdayl_OpenThink/ae71ec28-7e22-42c4-8549-4334dff8a811.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/mrdayl_OpenThink/1762652580.374203", - "retrieved_timestamp": "1762652580.374204", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "mrdayl/OpenThink", - "developer": "mrdayl", - "inference_platform": "unknown", - "id": "mrdayl/OpenThink" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20540720842919008 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34597850879756104 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28851963746223563 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32888541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18500664893617022 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/natong19/natong19_Mistral-Nemo-Instruct-2407-abliterated/5256f7b6-f830-4733-a092-01470607558d.json b/leaderboard_data/HFOpenLLMv2/natong19/natong19_Mistral-Nemo-Instruct-2407-abliterated/5256f7b6-f830-4733-a092-01470607558d.json deleted file mode 100644 index ab416a61e4711c07e719e7de7cb784037f3bc121..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/natong19/natong19_Mistral-Nemo-Instruct-2407-abliterated/5256f7b6-f830-4733-a092-01470607558d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/natong19_Mistral-Nemo-Instruct-2407-abliterated/1762652580.375077", - "retrieved_timestamp": "1762652580.375078", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "natong19/Mistral-Nemo-Instruct-2407-abliterated", - "developer": "natong19", - "inference_platform": "unknown", - "id": "natong19/Mistral-Nemo-Instruct-2407-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6392239258500778 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5048447739625885 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13217522658610273 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4033333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.351811835106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/natong19/natong19_Qwen2-7B-Instruct-abliterated/7c8605a5-2f0d-4cc7-b840-d77cb5fdf849.json b/leaderboard_data/HFOpenLLMv2/natong19/natong19_Qwen2-7B-Instruct-abliterated/7c8605a5-2f0d-4cc7-b840-d77cb5fdf849.json deleted file mode 100644 index 07e5087ae7933f125337c5659f212868ef47d3e4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/natong19/natong19_Qwen2-7B-Instruct-abliterated/7c8605a5-2f0d-4cc7-b840-d77cb5fdf849.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/natong19_Qwen2-7B-Instruct-abliterated/1762652580.375325", - "retrieved_timestamp": "1762652580.375325", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "natong19/Qwen2-7B-Instruct-abliterated", - "developer": "natong19", - "inference_platform": "unknown", - "id": "natong19/Qwen2-7B-Instruct-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5836945970026197 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5553035842403061 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4034270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3842253989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nbeerbower/nbeerbower_Kartoffel-Deepfry-12B/09ba1be1-4b42-4eba-810f-a0aed64aafc0.json b/leaderboard_data/HFOpenLLMv2/nbeerbower/nbeerbower_Kartoffel-Deepfry-12B/09ba1be1-4b42-4eba-810f-a0aed64aafc0.json deleted file mode 100644 index d5ab932e3c7ccbbc9887ae8aa1fdf28f634ad911..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nbeerbower/nbeerbower_Kartoffel-Deepfry-12B/09ba1be1-4b42-4eba-810f-a0aed64aafc0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Kartoffel-Deepfry-12B/1762652580.379381", - "retrieved_timestamp": "1762652580.3793821", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Kartoffel-Deepfry-12B", - "developer": "nbeerbower", - "inference_platform": "unknown", - "id": "nbeerbower/Kartoffel-Deepfry-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5021620411618949 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5365374219062301 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4791666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3582114361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nbeerbower/nbeerbower_Lyra4-Gutenberg-12B/02606fe0-ca08-4102-9670-8a18a9cc6f81.json b/leaderboard_data/HFOpenLLMv2/nbeerbower/nbeerbower_Lyra4-Gutenberg-12B/02606fe0-ca08-4102-9670-8a18a9cc6f81.json deleted file mode 100644 index ca3088c8eb193cc998e3332dc8387370a06bf55a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nbeerbower/nbeerbower_Lyra4-Gutenberg-12B/02606fe0-ca08-4102-9670-8a18a9cc6f81.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Lyra4-Gutenberg-12B/1762652580.380318", - "retrieved_timestamp": "1762652580.380318", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Lyra4-Gutenberg-12B", - "developer": "nbeerbower", - "inference_platform": "unknown", - "id": "nbeerbower/Lyra4-Gutenberg-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2212185888996751 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.538669487933139 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4037916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35713098404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nbeerbower/nbeerbower_Lyra4-Gutenberg2-12B/f9da5237-3903-4bbf-a0bc-0bcf3152f45a.json b/leaderboard_data/HFOpenLLMv2/nbeerbower/nbeerbower_Lyra4-Gutenberg2-12B/f9da5237-3903-4bbf-a0bc-0bcf3152f45a.json deleted file mode 100644 index 62629fe00600225a6f212a887a80c610399fca85..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nbeerbower/nbeerbower_Lyra4-Gutenberg2-12B/f9da5237-3903-4bbf-a0bc-0bcf3152f45a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Lyra4-Gutenberg2-12B/1762652580.380519", - "retrieved_timestamp": "1762652580.3805199", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Lyra4-Gutenberg2-12B", - "developer": "nbeerbower", - "inference_platform": "unknown", - "id": "nbeerbower/Lyra4-Gutenberg2-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25851296781428834 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5344527944750038 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11706948640483383 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39721874999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35654920212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nbeerbower/nbeerbower_Mistral-Nemo-Moderne-12B-FFT-experimental/e7337143-6ec7-4467-b6f5-907492705cc9.json b/leaderboard_data/HFOpenLLMv2/nbeerbower/nbeerbower_Mistral-Nemo-Moderne-12B-FFT-experimental/e7337143-6ec7-4467-b6f5-907492705cc9.json deleted file mode 100644 index 2637255380bd17ae0c6bb917e846e0c816e48b38..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nbeerbower/nbeerbower_Mistral-Nemo-Moderne-12B-FFT-experimental/e7337143-6ec7-4467-b6f5-907492705cc9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Mistral-Nemo-Moderne-12B-FFT-experimental/1762652580.3819818", - "retrieved_timestamp": "1762652580.381983", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Mistral-Nemo-Moderne-12B-FFT-experimental", - "developer": "nbeerbower", - "inference_platform": "unknown", - "id": "nbeerbower/Mistral-Nemo-Moderne-12B-FFT-experimental" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33522498082864577 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5234089179237257 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0770392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3714895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3454953457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nbeerbower/nbeerbower_Nemo-Loony-12B-experimental/894b90c6-c701-47d8-b930-4e271e28962f.json b/leaderboard_data/HFOpenLLMv2/nbeerbower/nbeerbower_Nemo-Loony-12B-experimental/894b90c6-c701-47d8-b930-4e271e28962f.json deleted file mode 100644 index 5e709cfeba9721e97c6bb6fffb35872610dd1869..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nbeerbower/nbeerbower_Nemo-Loony-12B-experimental/894b90c6-c701-47d8-b930-4e271e28962f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Nemo-Loony-12B-experimental/1762652580.383332", - "retrieved_timestamp": "1762652580.383332", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Nemo-Loony-12B-experimental", - "developer": "nbeerbower", - "inference_platform": "unknown", - "id": "nbeerbower/Nemo-Loony-12B-experimental" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37344357416100393 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38222228797769536 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3340625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1589095744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nbeerbower/nbeerbower_Nemoties-ChatML-12B/3644fc16-b0fa-42d7-b17a-eb8f8332193f.json b/leaderboard_data/HFOpenLLMv2/nbeerbower/nbeerbower_Nemoties-ChatML-12B/3644fc16-b0fa-42d7-b17a-eb8f8332193f.json deleted file mode 100644 index 21ab08a7d74c802d01072313bb15181a85a52c36..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nbeerbower/nbeerbower_Nemoties-ChatML-12B/3644fc16-b0fa-42d7-b17a-eb8f8332193f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_Nemoties-ChatML-12B/1762652580.383542", - "retrieved_timestamp": "1762652580.383543", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/Nemoties-ChatML-12B", - "developer": "nbeerbower", - "inference_platform": "unknown", - "id": "nbeerbower/Nemoties-ChatML-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6381999760635115 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5470252374810588 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45086458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3550531914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nbeerbower/nbeerbower_SmolNemo-12B-FFT-experimental/435e3ce7-479f-4624-978e-25d755dee811.json b/leaderboard_data/HFOpenLLMv2/nbeerbower/nbeerbower_SmolNemo-12B-FFT-experimental/435e3ce7-479f-4624-978e-25d755dee811.json deleted file mode 100644 index 386c6543c57b859f551992bd68c2ce7450734e36..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nbeerbower/nbeerbower_SmolNemo-12B-FFT-experimental/435e3ce7-479f-4624-978e-25d755dee811.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_SmolNemo-12B-FFT-experimental/1762652580.383975", - "retrieved_timestamp": "1762652580.383976", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/SmolNemo-12B-FFT-experimental", - "developer": "nbeerbower", - "inference_platform": "unknown", - "id": "nbeerbower/SmolNemo-12B-FFT-experimental" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3348005514257725 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3336088810494464 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38469791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12167553191489362 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nbeerbower/nbeerbower_mistral-nemo-wissenschaft-12B/5f68a07f-4442-4453-92c3-b615323da96b.json b/leaderboard_data/HFOpenLLMv2/nbeerbower/nbeerbower_mistral-nemo-wissenschaft-12B/5f68a07f-4442-4453-92c3-b615323da96b.json deleted file mode 100644 index e9b1607543d140a4b7afe8da832c4983f394f6e5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nbeerbower/nbeerbower_mistral-nemo-wissenschaft-12B/5f68a07f-4442-4453-92c3-b615323da96b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbeerbower_mistral-nemo-wissenschaft-12B/1762652580.388424", - "retrieved_timestamp": "1762652580.388424", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbeerbower/mistral-nemo-wissenschaft-12B", - "developer": "nbeerbower", - "inference_platform": "unknown", - "id": "nbeerbower/mistral-nemo-wissenschaft-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6520133246452745 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5040306120993181 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41778125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35322473404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nbrahme/nbrahme_IndusQ/b372e098-0e1c-410a-8f5a-1bd9a910aa6b.json b/leaderboard_data/HFOpenLLMv2/nbrahme/nbrahme_IndusQ/b372e098-0e1c-410a-8f5a-1bd9a910aa6b.json deleted file mode 100644 index ef4ef18655aa26cd2e664ff00b2fc1ba8753ab5c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nbrahme/nbrahme_IndusQ/b372e098-0e1c-410a-8f5a-1bd9a910aa6b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nbrahme_IndusQ/1762652580.38863", - "retrieved_timestamp": "1762652580.388631", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nbrahme/IndusQ", - "developer": "nbrahme", - "inference_platform": "unknown", - "id": "nbrahme/IndusQ" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24397487555242311 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30624035198474986 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26510067114093966 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3366354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11203457446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 1.176 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/necva/necva_replica-IEPile/86a45185-8753-4cd0-818f-63a62f03423f.json b/leaderboard_data/HFOpenLLMv2/necva/necva_replica-IEPile/86a45185-8753-4cd0-818f-63a62f03423f.json deleted file mode 100644 index 2bc5aa9215111677ac6070794fdde683e6f0dbc6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/necva/necva_replica-IEPile/86a45185-8753-4cd0-818f-63a62f03423f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/necva_replica-IEPile/1762652580.389119", - "retrieved_timestamp": "1762652580.38912", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "necva/replica-IEPile", - "developer": "necva", - "inference_platform": "unknown", - "id": "necva/replica-IEPile" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4677910167245132 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4778579652970231 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12386706948640483 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3997604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3560505319148936 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 4.65 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.1-bf16-falcon3-7b-instruct/5063eae6-e8f3-41c6-ab11-cfcc4a0a0cf3.json b/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.1-bf16-falcon3-7b-instruct/5063eae6-e8f3-41c6-ab11-cfcc4a0a0cf3.json deleted file mode 100644 index 5f6667883ae8b5c8d05a363affd7f564872a8ed4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.1-bf16-falcon3-7b-instruct/5063eae6-e8f3-41c6-ab11-cfcc4a0a0cf3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.1-bf16-falcon3-7b-instruct/1762652580.389358", - "retrieved_timestamp": "1762652580.389359", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "neopolita/jessi-v0.1-bf16-falcon3-7b-instruct", - "developer": "neopolita", - "inference_platform": "unknown", - "id": "neopolita/jessi-v0.1-bf16-falcon3-7b-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7527050448365891 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5516128933222162 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3806646525679758 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48248958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3923703457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.1-falcon3-10b-instruct/c2ee0925-6e4a-4d3b-80be-b8b98156e3db.json b/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.1-falcon3-10b-instruct/c2ee0925-6e4a-4d3b-80be-b8b98156e3db.json deleted file mode 100644 index 3dd133767b6b897c5c530b8683fcdeee42fb6910..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.1-falcon3-10b-instruct/c2ee0925-6e4a-4d3b-80be-b8b98156e3db.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.1-falcon3-10b-instruct/1762652580.389616", - "retrieved_timestamp": "1762652580.389617", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "neopolita/jessi-v0.1-falcon3-10b-instruct", - "developer": "neopolita", - "inference_platform": "unknown", - "id": "neopolita/jessi-v0.1-falcon3-10b-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.755152994055772 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5952883626256132 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2001510574018127 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3187919463087248 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42785416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4187998670212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.1-qwen2.5-7b-instruct/9b1f077d-5893-417c-ac87-1d0beb39b750.json b/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.1-qwen2.5-7b-instruct/9b1f077d-5893-417c-ac87-1d0beb39b750.json deleted file mode 100644 index f2e67b0c2292d2002ee784755416c951a35ceb22..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.1-qwen2.5-7b-instruct/9b1f077d-5893-417c-ac87-1d0beb39b750.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.1-qwen2.5-7b-instruct/1762652580.3898308", - "retrieved_timestamp": "1762652580.3898308", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "neopolita/jessi-v0.1-qwen2.5-7b-instruct", - "developer": "neopolita", - "inference_platform": "unknown", - "id": "neopolita/jessi-v0.1-qwen2.5-7b-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7326715337526651 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5292315105257686 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4086102719033233 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3913645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42278922872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.1-virtuoso-small/b4630d14-950d-4dbf-8897-74d46dd51130.json b/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.1-virtuoso-small/b4630d14-950d-4dbf-8897-74d46dd51130.json deleted file mode 100644 index 3fa3b5f98683a77c1b691fd49ff48da71f45c21c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.1-virtuoso-small/b4630d14-950d-4dbf-8897-74d46dd51130.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.1-virtuoso-small/1762652580.3900428", - "retrieved_timestamp": "1762652580.3900428", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "neopolita/jessi-v0.1-virtuoso-small", - "developer": "neopolita", - "inference_platform": "unknown", - "id": "neopolita/jessi-v0.1-virtuoso-small" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7959192719761344 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6442861439957068 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43616666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5129654255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.2-falcon3-10b-instruct/4a73436e-e2b7-4c03-b4b2-80d0ed8e389a.json b/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.2-falcon3-10b-instruct/4a73436e-e2b7-4c03-b4b2-80d0ed8e389a.json deleted file mode 100644 index 5b870a8b9ddae346508f05b0c5708ada6de3b1f5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.2-falcon3-10b-instruct/4a73436e-e2b7-4c03-b4b2-80d0ed8e389a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.2-falcon3-10b-instruct/1762652580.390252", - "retrieved_timestamp": "1762652580.390252", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "neopolita/jessi-v0.2-falcon3-10b-instruct", - "developer": "neopolita", - "inference_platform": "unknown", - "id": "neopolita/jessi-v0.2-falcon3-10b-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7768099753099553 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6204846671314362 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2122356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42813541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4354222074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.2-falcon3-7b-instruct/bd8025f1-66d4-4644-af1b-ca5366a32964.json b/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.2-falcon3-7b-instruct/bd8025f1-66d4-4644-af1b-ca5366a32964.json deleted file mode 100644 index 33837f59384f685387a48b03547b6dd3c81252c4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.2-falcon3-7b-instruct/bd8025f1-66d4-4644-af1b-ca5366a32964.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.2-falcon3-7b-instruct/1762652580.39046", - "retrieved_timestamp": "1762652580.39046", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "neopolita/jessi-v0.2-falcon3-7b-instruct", - "developer": "neopolita", - "inference_platform": "unknown", - "id": "neopolita/jessi-v0.2-falcon3-7b-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5770754930251731 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5363079188886575 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2537764350453172 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44788541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3904587765957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.3-falcon3-7b-instruct/95281cbf-6f27-4e17-b21f-9a0604d5629b.json b/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.3-falcon3-7b-instruct/95281cbf-6f27-4e17-b21f-9a0604d5629b.json deleted file mode 100644 index eed5dd24008d7f9e83adbff2b3134a31fc7e9144..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.3-falcon3-7b-instruct/95281cbf-6f27-4e17-b21f-9a0604d5629b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.3-falcon3-7b-instruct/1762652580.390663", - "retrieved_timestamp": "1762652580.3906639", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "neopolita/jessi-v0.3-falcon3-7b-instruct", - "developer": "neopolita", - "inference_platform": "unknown", - "id": "neopolita/jessi-v0.3-falcon3-7b-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7509064836855099 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.538793502664194 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18882175226586104 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46915625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3970246010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.4-falcon3-7b-instruct/514b1b8c-d80a-4851-afec-e04968b2e733.json b/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.4-falcon3-7b-instruct/514b1b8c-d80a-4851-afec-e04968b2e733.json deleted file mode 100644 index 0813ed116ce21c5131044575dee67f936b5d1fe8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.4-falcon3-7b-instruct/514b1b8c-d80a-4851-afec-e04968b2e733.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.4-falcon3-7b-instruct/1762652580.39086", - "retrieved_timestamp": "1762652580.390861", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "neopolita/jessi-v0.4-falcon3-7b-instruct", - "developer": "neopolita", - "inference_platform": "unknown", - "id": "neopolita/jessi-v0.4-falcon3-7b-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7603735865281896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5521668757306609 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3768882175226586 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49712500000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40043218085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.5-falcon3-7b-instruct/6736897b-390a-4c19-8a04-9b606c1705b1.json b/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.5-falcon3-7b-instruct/6736897b-390a-4c19-8a04-9b606c1705b1.json deleted file mode 100644 index 1bea57d256166bea613d3b3410d234fdffb63100..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.5-falcon3-7b-instruct/6736897b-390a-4c19-8a04-9b606c1705b1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.5-falcon3-7b-instruct/1762652580.391073", - "retrieved_timestamp": "1762652580.391074", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "neopolita/jessi-v0.5-falcon3-7b-instruct", - "developer": "neopolita", - "inference_platform": "unknown", - "id": "neopolita/jessi-v0.5-falcon3-7b-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7411645544931892 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5589627302276082 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37386706948640486 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48652083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3966090425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.6-falcon3-7b-instruct/5b934386-a0e9-437d-bf9e-a51074415a1e.json b/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.6-falcon3-7b-instruct/5b934386-a0e9-437d-bf9e-a51074415a1e.json deleted file mode 100644 index 38c76d178ee3f4de4612f280bf436ba3b44bf70c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_jessi-v0.6-falcon3-7b-instruct/5b934386-a0e9-437d-bf9e-a51074415a1e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/neopolita_jessi-v0.6-falcon3-7b-instruct/1762652580.391277", - "retrieved_timestamp": "1762652580.391277", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "neopolita/jessi-v0.6-falcon3-7b-instruct", - "developer": "neopolita", - "inference_platform": "unknown", - "id": "neopolita/jessi-v0.6-falcon3-7b-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7401904723910335 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5508818723957883 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3564954682779456 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49042708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3956948138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_loki-v0.1-virtuoso/907047d7-1767-4009-8e04-02f5dc366355.json b/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_loki-v0.1-virtuoso/907047d7-1767-4009-8e04-02f5dc366355.json deleted file mode 100644 index 96a5075f87085e6b2c4d15070f9854003069fb8d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/neopolita/neopolita_loki-v0.1-virtuoso/907047d7-1767-4009-8e04-02f5dc366355.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/neopolita_loki-v0.1-virtuoso/1762652580.3914938", - "retrieved_timestamp": "1762652580.391495", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "neopolita/loki-v0.1-virtuoso", - "developer": "neopolita", - "inference_platform": "unknown", - "id": "neopolita/loki-v0.1-virtuoso" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7819308324135517 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6467251502613163 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3391238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35067114093959734 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43753125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5128823138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_DeepSeek-R1-MFANN-TIES-unretrained-7b/43da500e-cdc7-4b70-a0eb-6ae3371670d9.json b/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_DeepSeek-R1-MFANN-TIES-unretrained-7b/43da500e-cdc7-4b70-a0eb-6ae3371670d9.json deleted file mode 100644 index 0e61ef544b86e3b655e8e3f1fee9d26ebfae1ff7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_DeepSeek-R1-MFANN-TIES-unretrained-7b/43da500e-cdc7-4b70-a0eb-6ae3371670d9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_DeepSeek-R1-MFANN-TIES-unretrained-7b/1762652580.3919501", - "retrieved_timestamp": "1762652580.391951", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/DeepSeek-R1-MFANN-TIES-unretrained-7b", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/DeepSeek-R1-MFANN-TIES-unretrained-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2586880587951081 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30859903405301287 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3527291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11452792553191489 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN-SFT/748c7e5a-697b-4763-a43e-e3b6a6f2951b.json b/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN-SFT/748c7e5a-697b-4763-a43e-e3b6a6f2951b.json deleted file mode 100644 index a03535bc5a247aee54c1b67b99749423022bebf4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN-SFT/748c7e5a-697b-4763-a43e-e3b6a6f2951b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN-SFT/1762652580.393719", - "retrieved_timestamp": "1762652580.3937201", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN-SFT", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN-SFT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36822298168858625 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.485188719488523 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05966767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3725416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3336103723404255 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3b/c5913e2b-c8c7-4e8f-a1c3-f2f764c8478d.json b/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3b/c5913e2b-c8c7-4e8f-a1c3-f2f764c8478d.json deleted file mode 100644 index 0afd635c81c3f7f6b51016f3f03c80c70cf930a8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3b/c5913e2b-c8c7-4e8f-a1c3-f2f764c8478d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3b/1762652580.395648", - "retrieved_timestamp": "1762652580.395648", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN3b", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2524435165361241 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4433128382028508 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36060416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23055186170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv0.15/ebdb6805-f14e-4fb9-b1c8-acd258b93385.json b/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv0.15/ebdb6805-f14e-4fb9-b1c8-acd258b93385.json deleted file mode 100644 index 6e09a6ebc2007cc689319e47080955ec171ba0f8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv0.15/ebdb6805-f14e-4fb9-b1c8-acd258b93385.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.15/1762652580.3958452", - "retrieved_timestamp": "1762652580.395846", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN3bv0.15", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3bv0.15" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2012105657433388 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.453931293669888 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3957916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24684175531914893 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv0.18/5b522625-39ed-4faa-a3f6-1cec01baf906.json b/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv0.18/5b522625-39ed-4faa-a3f6-1cec01baf906.json deleted file mode 100644 index d77b9e4b8c32ea9129fae4112e2b218a2364fbca..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv0.18/5b522625-39ed-4faa-a3f6-1cec01baf906.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.18/1762652580.396076", - "retrieved_timestamp": "1762652580.396081", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN3bv0.18", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3bv0.18" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22064455644356973 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4514366169824164 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40236458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv0.19/4207b373-ef5c-48f8-a463-814b81a44410.json b/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv0.19/4207b373-ef5c-48f8-a463-814b81a44410.json deleted file mode 100644 index 7495594ea85946d425b09142e746935c0c87e3fe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv0.19/4207b373-ef5c-48f8-a463-814b81a44410.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.19/1762652580.396478", - "retrieved_timestamp": "1762652580.396479", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN3bv0.19", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3bv0.19" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22581528123157665 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4515800678058734 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.022658610271903322 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40239583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25199468085106386 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv0.20/2d36210e-e2ca-41a8-9434-c29168849a28.json b/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv0.20/2d36210e-e2ca-41a8-9434-c29168849a28.json deleted file mode 100644 index bd3227c9e033295e3f4ddbff1eedc7aab64fbef7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv0.20/2d36210e-e2ca-41a8-9434-c29168849a28.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.20/1762652580.3967948", - "retrieved_timestamp": "1762652580.396796", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN3bv0.20", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3bv0.20" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21934578030736224 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4493365019423472 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4077291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv0.21/053f6333-9722-4c3e-a5bb-246b273225de.json b/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv0.21/053f6333-9722-4c3e-a5bb-246b273225de.json deleted file mode 100644 index c76748100e98c38ef4b51e2df92ff3787e5c9ca1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv0.21/053f6333-9722-4c3e-a5bb-246b273225de.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.21/1762652580.397045", - "retrieved_timestamp": "1762652580.397046", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN3bv0.21", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3bv0.21" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1909189838517356 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44700236898039053 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03172205438066465 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37594791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23927859042553193 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv0.22/e551e936-41fa-4fda-84e9-dec9f5694c5d.json b/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv0.22/e551e936-41fa-4fda-84e9-dec9f5694c5d.json deleted file mode 100644 index 25235c3dbacaf29814b28bc9dba766e88c7ad68d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv0.22/e551e936-41fa-4fda-84e9-dec9f5694c5d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.22/1762652580.39726", - "retrieved_timestamp": "1762652580.3972611", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN3bv0.22", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3bv0.22" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1979381374752324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44851095830051274 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35213541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2517453457446808 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv0.23/28396f73-b949-4db0-b685-77fc5901770b.json b/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv0.23/28396f73-b949-4db0-b685-77fc5901770b.json deleted file mode 100644 index dbb9416bdf790ea8eea9f9448ad958d4ff3044bb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv0.23/28396f73-b949-4db0-b685-77fc5901770b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.23/1762652580.39747", - "retrieved_timestamp": "1762652580.397471", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN3bv0.23", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3bv0.23" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20480768804549704 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44954178056127364 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3427395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2417719414893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv0.24/0081cd67-9178-4443-aebf-721b75c0fc77.json b/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv0.24/0081cd67-9178-4443-aebf-721b75c0fc77.json deleted file mode 100644 index 0119c1fd50bb5ac27e7621fa19a4b9ecd2f812da..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv0.24/0081cd67-9178-4443-aebf-721b75c0fc77.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv0.24/1762652580.397681", - "retrieved_timestamp": "1762652580.397682", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN3bv0.24", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3bv0.24" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2200450360598767 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4407346600666096 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3520729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23520611702127658 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv1.1/fb148468-c189-4fe5-b803-7532af8dec1d.json b/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv1.1/fb148468-c189-4fe5-b803-7532af8dec1d.json deleted file mode 100644 index e88e28911bfc49b6902b8ba6efdb1f25d746ddf6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv1.1/fb148468-c189-4fe5-b803-7532af8dec1d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv1.1/1762652580.3978848", - "retrieved_timestamp": "1762652580.397886", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN3bv1.1", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3bv1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2506948230694557 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3397086626022651 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3223125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11585771276595745 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.775 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv1.2/16b4d316-db1d-4282-a5c0-b8ffe4af817c.json b/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv1.2/16b4d316-db1d-4282-a5c0-b8ffe4af817c.json deleted file mode 100644 index b6e07831b41f247152eb0430aaea69224ce0090c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv1.2/16b4d316-db1d-4282-a5c0-b8ffe4af817c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv1.2/1762652580.3980958", - "retrieved_timestamp": "1762652580.3980958", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN3bv1.2", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3bv1.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2686050789682487 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3659932511014956 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31555208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14502992021276595 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.775 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv1.3/5981cb70-62a7-4e42-bf12-081c67c1b792.json b/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv1.3/5981cb70-62a7-4e42-bf12-081c67c1b792.json deleted file mode 100644 index e4b9002a70b661429f6be2ad18a6ba66a933e568..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv1.3/5981cb70-62a7-4e42-bf12-081c67c1b792.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv1.3/1762652580.3983822", - "retrieved_timestamp": "1762652580.3983831", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN3bv1.3", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3bv1.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25466650709007654 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4456312489762861 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.329875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22755984042553193 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv1.4/426bdea2-83f2-4915-9e82-ba4c8c8f4224.json b/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv1.4/426bdea2-83f2-4915-9e82-ba4c8c8f4224.json deleted file mode 100644 index 0d34b22ce488e13e48dfcde2b43d9e7ef76da340..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANN3bv1.4/426bdea2-83f2-4915-9e82-ba4c8c8f4224.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANN3bv1.4/1762652580.398614", - "retrieved_timestamp": "1762652580.3986151", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANN3bv1.4", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANN3bv1.4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35243598097492435 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4808549324972969 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3707708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2705285904255319 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.78 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANNv0.19/d2b0785d-a169-4773-a3fc-95b536fe3cc2.json b/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANNv0.19/d2b0785d-a169-4773-a3fc-95b536fe3cc2.json deleted file mode 100644 index 4f8f4bb9800c649e7ad01071ddcc40178cbb2000..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANNv0.19/d2b0785d-a169-4773-a3fc-95b536fe3cc2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.19/1762652580.39887", - "retrieved_timestamp": "1762652580.39887", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANNv0.19", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANNv0.19" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30567449921763146 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47313832038755316 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35269791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24725731382978725 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANNv0.20/4c84cbc4-1a4d-45d9-909b-92d2b4e961b6.json b/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANNv0.20/4c84cbc4-1a4d-45d9-909b-92d2b4e961b6.json deleted file mode 100644 index 69ea189bb52be8ad94efbd4230c9dc9a623beea0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANNv0.20/4c84cbc4-1a4d-45d9-909b-92d2b4e961b6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.20/1762652580.399081", - "retrieved_timestamp": "1762652580.399082", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANNv0.20", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANNv0.20" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34786477657061043 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4574431878198548 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38739583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32022938829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANNv0.21/5d37ba65-09f6-4762-836e-4634c06ac9f7.json b/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANNv0.21/5d37ba65-09f6-4762-836e-4634c06ac9f7.json deleted file mode 100644 index ef8d13071c14993ce15eafa0f0faa285467079c8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANNv0.21/5d37ba65-09f6-4762-836e-4634c06ac9f7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.21/1762652580.399296", - "retrieved_timestamp": "1762652580.399297", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANNv0.21", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANNv0.21" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3233099287667832 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45763723048372523 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3993333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3031083776595745 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANNv0.22.1/5009ba04-1a8d-4e91-bd32-659fe67c4d26.json b/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANNv0.22.1/5009ba04-1a8d-4e91-bd32-659fe67c4d26.json deleted file mode 100644 index 6c2c90eb81a88f0efd78b9c7dc9aeedf2b0f308c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANNv0.22.1/5009ba04-1a8d-4e91-bd32-659fe67c4d26.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.22.1/1762652580.3995059", - "retrieved_timestamp": "1762652580.399507", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANNv0.22.1", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANNv0.22.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3089469274857378 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46608928527824584 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3753020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33427526595744683 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANNv0.23/f7b617fa-7095-4eef-88bb-4fd73c23d5dc.json b/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANNv0.23/f7b617fa-7095-4eef-88bb-4fd73c23d5dc.json deleted file mode 100644 index 023c8582eb623b9ac4f683271754609b4bae51a3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANNv0.23/f7b617fa-7095-4eef-88bb-4fd73c23d5dc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.23/1762652580.3997262", - "retrieved_timestamp": "1762652580.399727", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANNv0.23", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANNv0.23" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3127435205255389 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4898102063834755 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04984894259818731 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3767916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33876329787234044 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANNv0.24/59e5fcd0-e46f-4346-b695-bee4dab9cfc4.json b/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANNv0.24/59e5fcd0-e46f-4346-b695-bee4dab9cfc4.json deleted file mode 100644 index 0aabbd26bcecc1db7494c6196c755ddc4380d5d2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANNv0.24/59e5fcd0-e46f-4346-b695-bee4dab9cfc4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.24/1762652580.3999438", - "retrieved_timestamp": "1762652580.3999438", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANNv0.24", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANNv0.24" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162409074588758 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.479027491915232 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3753958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3347739361702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANNv0.25/e94f28ff-ae6c-4109-96a2-9dbe07621e03.json b/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANNv0.25/e94f28ff-ae6c-4109-96a2-9dbe07621e03.json deleted file mode 100644 index b8e67229fdaa6a5ff23d96df093c2473930eb556..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/netcat420/netcat420_MFANNv0.25/e94f28ff-ae6c-4109-96a2-9dbe07621e03.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netcat420_MFANNv0.25/1762652580.400151", - "retrieved_timestamp": "1762652580.400151", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netcat420/MFANNv0.25", - "developer": "netcat420", - "inference_platform": "unknown", - "id": "netcat420/MFANNv0.25" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34666573580322435 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47940650861209216 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36879166666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33427526595744683 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/netease-youdao/netease-youdao_Confucius-o1-14B/ddd234e4-0665-4b36-943f-e99f0a293f50.json b/leaderboard_data/HFOpenLLMv2/netease-youdao/netease-youdao_Confucius-o1-14B/ddd234e4-0665-4b36-943f-e99f0a293f50.json deleted file mode 100644 index a34f532e2199ee7c093256ccd4a6a277fb19ea1b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/netease-youdao/netease-youdao_Confucius-o1-14B/ddd234e4-0665-4b36-943f-e99f0a293f50.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/netease-youdao_Confucius-o1-14B/1762652580.4025002", - "retrieved_timestamp": "1762652580.402501", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "netease-youdao/Confucius-o1-14B", - "developer": "netease-youdao", - "inference_platform": "unknown", - "id": "netease-youdao/Confucius-o1-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6378497941018719 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6299772409698484 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4312688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3649328859060403 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4338125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5265126329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/newsbang/newsbang_Homer-7B-v0.1/af9ae4eb-2fdf-414a-8585-4f0f894a6a49.json b/leaderboard_data/HFOpenLLMv2/newsbang/newsbang_Homer-7B-v0.1/af9ae4eb-2fdf-414a-8585-4f0f894a6a49.json deleted file mode 100644 index 62fd48ad5dc6ba6e2125e720411f6b1a5f1ec006..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/newsbang/newsbang_Homer-7B-v0.1/af9ae4eb-2fdf-414a-8585-4f0f894a6a49.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/newsbang_Homer-7B-v0.1/1762652580.402741", - "retrieved_timestamp": "1762652580.402742", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "newsbang/Homer-7B-v0.1", - "developer": "newsbang", - "inference_platform": "unknown", - "id": "newsbang/Homer-7B-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6108724850064495 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5601389961416444 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859516616314199 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43569791666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4474734042553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/newsbang/newsbang_Homer-7B-v0.2/d7964788-36a6-4b86-add6-cd8a1a42eb7c.json b/leaderboard_data/HFOpenLLMv2/newsbang/newsbang_Homer-7B-v0.2/d7964788-36a6-4b86-add6-cd8a1a42eb7c.json deleted file mode 100644 index e76baf090418994621d80fb095850549277389e6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/newsbang/newsbang_Homer-7B-v0.2/d7964788-36a6-4b86-add6-cd8a1a42eb7c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/newsbang_Homer-7B-v0.2/1762652580.403213", - "retrieved_timestamp": "1762652580.4032168", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "newsbang/Homer-7B-v0.2", - "developer": "newsbang", - "inference_platform": "unknown", - "id": "newsbang/Homer-7B-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7493827488840721 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5517330182832224 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24773413897280966 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33221476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42975 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4409906914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nhyha/nhyha_N3N_Delirium-v1_1030_0227/5128233e-41be-4e26-9ec2-2b7926c66b7c.json b/leaderboard_data/HFOpenLLMv2/nhyha/nhyha_N3N_Delirium-v1_1030_0227/5128233e-41be-4e26-9ec2-2b7926c66b7c.json deleted file mode 100644 index 9783c6a10a7954e48118c255b45dc0eb7d0ba532..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nhyha/nhyha_N3N_Delirium-v1_1030_0227/5128233e-41be-4e26-9ec2-2b7926c66b7c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nhyha_N3N_Delirium-v1_1030_0227/1762652580.4055", - "retrieved_timestamp": "1762652580.4055", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nhyha/N3N_Delirium-v1_1030_0227", - "developer": "nhyha", - "inference_platform": "unknown", - "id": "nhyha/N3N_Delirium-v1_1030_0227" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8022890375315275 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5890686677822234 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2107250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40981249999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41497672872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nhyha/nhyha_N3N_Llama-3.1-8B-Instruct_1028_0216/928f9cd0-ce0f-43f7-aa5f-be9cbf4d91cd.json b/leaderboard_data/HFOpenLLMv2/nhyha/nhyha_N3N_Llama-3.1-8B-Instruct_1028_0216/928f9cd0-ce0f-43f7-aa5f-be9cbf4d91cd.json deleted file mode 100644 index 18b79c26b215bcd97a78396b5236bea45175b26d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nhyha/nhyha_N3N_Llama-3.1-8B-Instruct_1028_0216/928f9cd0-ce0f-43f7-aa5f-be9cbf4d91cd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nhyha_N3N_Llama-3.1-8B-Instruct_1028_0216/1762652580.405756", - "retrieved_timestamp": "1762652580.405757", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nhyha/N3N_Llama-3.1-8B-Instruct_1028_0216", - "developer": "nhyha", - "inference_platform": "unknown", - "id": "nhyha/N3N_Llama-3.1-8B-Instruct_1028_0216" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4796063334175543 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5053741309920361 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40503125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36377992021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nhyha/nhyha_merge_Qwen2.5-7B-Instruct_20241023_0314/eb608d79-545a-4cc2-8d28-e539a3af7f17.json b/leaderboard_data/HFOpenLLMv2/nhyha/nhyha_merge_Qwen2.5-7B-Instruct_20241023_0314/eb608d79-545a-4cc2-8d28-e539a3af7f17.json deleted file mode 100644 index 999de38f0d856afdff4bac6023ba8de413df018e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nhyha/nhyha_merge_Qwen2.5-7B-Instruct_20241023_0314/eb608d79-545a-4cc2-8d28-e539a3af7f17.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nhyha_merge_Qwen2.5-7B-Instruct_20241023_0314/1762652580.406431", - "retrieved_timestamp": "1762652580.406431", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nhyha/merge_Qwen2.5-7B-Instruct_20241023_0314", - "developer": "nhyha", - "inference_platform": "unknown", - "id": "nhyha/merge_Qwen2.5-7B-Instruct_20241023_0314" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5694568190179834 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5558529241660143 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3542296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42506249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45420545212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nlpguy/nlpguy_Lion-Lamarck-v.1.0.8/67582e10-cebf-4938-bfca-2eb6883e2c39.json b/leaderboard_data/HFOpenLLMv2/nlpguy/nlpguy_Lion-Lamarck-v.1.0.8/67582e10-cebf-4938-bfca-2eb6883e2c39.json deleted file mode 100644 index f511adeb625c2c1f5fa96e15560916c35941597f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nlpguy/nlpguy_Lion-Lamarck-v.1.0.8/67582e10-cebf-4938-bfca-2eb6883e2c39.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nlpguy_Lion-Lamarck-v.1.0.8/1762652580.40752", - "retrieved_timestamp": "1762652580.407521", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nlpguy/Lion-Lamarck-v.1.0.8", - "developer": "nlpguy", - "inference_platform": "unknown", - "id": "nlpguy/Lion-Lamarck-v.1.0.8" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45090471061228654 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5868930914775694 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.554380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35822147651006714 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4672708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46434507978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nlpguy/nlpguy_Lion-Lamarck-v.1.0.9/f5fa6816-051d-4d86-bef5-ba9731b8bd9a.json b/leaderboard_data/HFOpenLLMv2/nlpguy/nlpguy_Lion-Lamarck-v.1.0.9/f5fa6816-051d-4d86-bef5-ba9731b8bd9a.json deleted file mode 100644 index 5ca077e0eec270921aca806d4df3e18e2679f861..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nlpguy/nlpguy_Lion-Lamarck-v.1.0.9/f5fa6816-051d-4d86-bef5-ba9731b8bd9a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nlpguy_Lion-Lamarck-v.1.0.9/1762652580.407768", - "retrieved_timestamp": "1762652580.4077692", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nlpguy/Lion-Lamarck-v.1.0.9", - "developer": "nlpguy", - "inference_platform": "unknown", - "id": "nlpguy/Lion-Lamarck-v.1.0.9" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34089549063152436 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5918237099420903 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5641993957703928 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3901006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5299583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47041223404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nlpguy/nlpguy_Lion-Lamarck-v.1.1.0/027ad81a-1271-4c25-9966-02370f6ee49d.json b/leaderboard_data/HFOpenLLMv2/nlpguy/nlpguy_Lion-Lamarck-v.1.1.0/027ad81a-1271-4c25-9966-02370f6ee49d.json deleted file mode 100644 index 69884e4bd3e7e4917b58ead4ab7b143b2fa80927..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nlpguy/nlpguy_Lion-Lamarck-v.1.1.0/027ad81a-1271-4c25-9966-02370f6ee49d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nlpguy_Lion-Lamarck-v.1.1.0/1762652580.4079711", - "retrieved_timestamp": "1762652580.4079711", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nlpguy/Lion-Lamarck-v.1.1.0", - "developer": "nlpguy", - "inference_platform": "unknown", - "id": "nlpguy/Lion-Lamarck-v.1.1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3657750324694034 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5962460968547941 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5755287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3926174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.53253125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4630984042553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nlpguy/nlpguy_Miisce-one/e557a750-53b2-4181-a19c-dfdeee11ee61.json b/leaderboard_data/HFOpenLLMv2/nlpguy/nlpguy_Miisce-one/e557a750-53b2-4181-a19c-dfdeee11ee61.json deleted file mode 100644 index 419da1c6cacdd2d7ede0183a081d1b41bc87393a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nlpguy/nlpguy_Miisce-one/e557a750-53b2-4181-a19c-dfdeee11ee61.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nlpguy_Miisce-one/1762652580.4081762", - "retrieved_timestamp": "1762652580.408177", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nlpguy/Miisce-one", - "developer": "nlpguy", - "inference_platform": "unknown", - "id": "nlpguy/Miisce-one" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6065761069517768 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6504562869685913 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4169184290030212 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48198958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5412234042553191 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nlpguy/nlpguy_StableProse/bedab076-13e7-468a-b8e8-dddb57d78583.json b/leaderboard_data/HFOpenLLMv2/nlpguy/nlpguy_StableProse/bedab076-13e7-468a-b8e8-dddb57d78583.json deleted file mode 100644 index c19960b914965f1efac9cf91b46a2b5673b849e9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nlpguy/nlpguy_StableProse/bedab076-13e7-468a-b8e8-dddb57d78583.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nlpguy_StableProse/1762652580.40907", - "retrieved_timestamp": "1762652580.40907", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nlpguy/StableProse", - "developer": "nlpguy", - "inference_platform": "unknown", - "id": "nlpguy/StableProse" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19723888172271792 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5116558625577087 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4067083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3468251329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nlpguy/nlpguy_StarFusion-alpha1/1d5c35ef-ec57-42a3-8459-6db62627c6d2.json b/leaderboard_data/HFOpenLLMv2/nlpguy/nlpguy_StarFusion-alpha1/1d5c35ef-ec57-42a3-8459-6db62627c6d2.json deleted file mode 100644 index f38d0acbefea5c0b3b2f9a62657b15ba4d505aa7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nlpguy/nlpguy_StarFusion-alpha1/1d5c35ef-ec57-42a3-8459-6db62627c6d2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nlpguy_StarFusion-alpha1/1762652580.409272", - "retrieved_timestamp": "1762652580.409272", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nlpguy/StarFusion-alpha1", - "developer": "nlpguy", - "inference_platform": "unknown", - "id": "nlpguy/StarFusion-alpha1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5660092997690572 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4428694115507034 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40810416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3190658244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/noname0202/noname0202_Llama-3.2-4x3B-Instruct/e9511b0a-1083-4a0d-a9e0-97efcfc0891e.json b/leaderboard_data/HFOpenLLMv2/noname0202/noname0202_Llama-3.2-4x3B-Instruct/e9511b0a-1083-4a0d-a9e0-97efcfc0891e.json deleted file mode 100644 index 94770a093d527813c9f2ca7370d525a948e3f285..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/noname0202/noname0202_Llama-3.2-4x3B-Instruct/e9511b0a-1083-4a0d-a9e0-97efcfc0891e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/noname0202_Llama-3.2-4x3B-Instruct/1762652580.409481", - "retrieved_timestamp": "1762652580.409481", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "noname0202/Llama-3.2-4x3B-Instruct", - "developer": "noname0202", - "inference_platform": "unknown", - "id": "noname0202/Llama-3.2-4x3B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7067181744438091 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4647311192852755 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15861027190332327 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36739583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3285405585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 9.949 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/noname0202/noname0202_gemma-2-9b-sft-jp-en-zh-v1/b32d34eb-14b5-410a-8772-041d40ca73b8.json b/leaderboard_data/HFOpenLLMv2/noname0202/noname0202_gemma-2-9b-sft-jp-en-zh-v1/b32d34eb-14b5-410a-8772-041d40ca73b8.json deleted file mode 100644 index d94a56fcced6eafeb4818df88cac10aac24a2e24..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/noname0202/noname0202_gemma-2-9b-sft-jp-en-zh-v1/b32d34eb-14b5-410a-8772-041d40ca73b8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/noname0202_gemma-2-9b-sft-jp-en-zh-v1/1762652580.410035", - "retrieved_timestamp": "1762652580.410036", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "noname0202/gemma-2-9b-sft-jp-en-zh-v1", - "developer": "noname0202", - "inference_platform": "unknown", - "id": "noname0202/gemma-2-9b-sft-jp-en-zh-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29880494864736673 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4519290530910057 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40801041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3125 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/noname0202/noname0202_gemma-2-9b-sft-jp-en-zh-v2/ee687c56-a9b4-4205-866b-b3067c066992.json b/leaderboard_data/HFOpenLLMv2/noname0202/noname0202_gemma-2-9b-sft-jp-en-zh-v2/ee687c56-a9b4-4205-866b-b3067c066992.json deleted file mode 100644 index f5a4d275aeeb913b8247329ab9716bb5f92be4d9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/noname0202/noname0202_gemma-2-9b-sft-jp-en-zh-v2/ee687c56-a9b4-4205-866b-b3067c066992.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/noname0202_gemma-2-9b-sft-jp-en-zh-v2/1762652580.4102452", - "retrieved_timestamp": "1762652580.4102452", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "noname0202/gemma-2-9b-sft-jp-en-zh-v2", - "developer": "noname0202", - "inference_platform": "unknown", - "id": "noname0202/gemma-2-9b-sft-jp-en-zh-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3993470657854493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4515041184509401 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36115625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36751994680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/notbdq/notbdq_Qwen2.5-14B-Instruct-1M-GRPO-Reasoning/7e0f008e-4327-4ee0-a810-b5564b651233.json b/leaderboard_data/HFOpenLLMv2/notbdq/notbdq_Qwen2.5-14B-Instruct-1M-GRPO-Reasoning/7e0f008e-4327-4ee0-a810-b5564b651233.json deleted file mode 100644 index 185170d21f32ee0a36ea17928da932d9c6dc068e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/notbdq/notbdq_Qwen2.5-14B-Instruct-1M-GRPO-Reasoning/7e0f008e-4327-4ee0-a810-b5564b651233.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/notbdq_Qwen2.5-14B-Instruct-1M-GRPO-Reasoning/1762652580.4113228", - "retrieved_timestamp": "1762652580.4113238", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "notbdq/Qwen2.5-14B-Instruct-1M-GRPO-Reasoning", - "developer": "notbdq", - "inference_platform": "unknown", - "id": "notbdq/Qwen2.5-14B-Instruct-1M-GRPO-Reasoning" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8413564896696322 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6198222551365405 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5302114803625377 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.418 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4849567819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nothingiisreal/nothingiisreal_L3.1-8B-Celeste-V1.5/5b7a80ce-0fb2-4fb8-9381-184d7a434706.json b/leaderboard_data/HFOpenLLMv2/nothingiisreal/nothingiisreal_L3.1-8B-Celeste-V1.5/5b7a80ce-0fb2-4fb8-9381-184d7a434706.json deleted file mode 100644 index 5e6e3438e8a19e88c8d127b59acde932029c58ae..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nothingiisreal/nothingiisreal_L3.1-8B-Celeste-V1.5/5b7a80ce-0fb2-4fb8-9381-184d7a434706.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nothingiisreal_L3.1-8B-Celeste-V1.5/1762652580.4115741", - "retrieved_timestamp": "1762652580.411575", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nothingiisreal/L3.1-8B-Celeste-V1.5", - "developer": "nothingiisreal", - "inference_platform": "unknown", - "id": "nothingiisreal/L3.1-8B-Celeste-V1.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7326715337526651 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5011796822721141 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37486458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37042885638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nothingiisreal/nothingiisreal_MN-12B-Starcannon-v2/1ff70031-dbe8-467a-9dbd-9fd789b9841b.json b/leaderboard_data/HFOpenLLMv2/nothingiisreal/nothingiisreal_MN-12B-Starcannon-v2/1ff70031-dbe8-467a-9dbd-9fd789b9841b.json deleted file mode 100644 index f9446fa20ecd4767851ec4c1f4de73027de84011..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nothingiisreal/nothingiisreal_MN-12B-Starcannon-v2/1ff70031-dbe8-467a-9dbd-9fd789b9841b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nothingiisreal_MN-12B-Starcannon-v2/1762652580.411832", - "retrieved_timestamp": "1762652580.411832", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nothingiisreal/MN-12B-Starcannon-v2", - "developer": "nothingiisreal", - "inference_platform": "unknown", - "id": "nothingiisreal/MN-12B-Starcannon-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3925273828995953 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5004499888471767 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05966767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39781249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31283244680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nothingiisreal/nothingiisreal_MN-12B-Starcannon-v3/633a786a-fe99-4a6e-b402-888e36e8b6c9.json b/leaderboard_data/HFOpenLLMv2/nothingiisreal/nothingiisreal_MN-12B-Starcannon-v3/633a786a-fe99-4a6e-b402-888e36e8b6c9.json deleted file mode 100644 index fc4c4e5ee632db2ee1a8c198cbf2e37ebf7af866..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nothingiisreal/nothingiisreal_MN-12B-Starcannon-v3/633a786a-fe99-4a6e-b402-888e36e8b6c9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nothingiisreal_MN-12B-Starcannon-v3/1762652580.412042", - "retrieved_timestamp": "1762652580.412042", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nothingiisreal/MN-12B-Starcannon-v3", - "developer": "nothingiisreal", - "inference_platform": "unknown", - "id": "nothingiisreal/MN-12B-Starcannon-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38073755413414184 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5170553444795719 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40463541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32646276595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_AceInstruct-1.5B/a26b4b3f-aad1-4d2f-a97a-bf24850a3092.json b/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_AceInstruct-1.5B/a26b4b3f-aad1-4d2f-a97a-bf24850a3092.json deleted file mode 100644 index 2e28f324ea4d7456e2a775d8b67298030b85e231..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_AceInstruct-1.5B/a26b4b3f-aad1-4d2f-a97a-bf24850a3092.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nvidia_AceInstruct-1.5B/1762652580.412246", - "retrieved_timestamp": "1762652580.412247", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nvidia/AceInstruct-1.5B", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/AceInstruct-1.5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3947758613811354 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3931958135346713 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34600000000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2573969414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_AceInstruct-72B/08e924b1-121c-4ff7-bf1d-06b9cb90c7c0.json b/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_AceInstruct-72B/08e924b1-121c-4ff7-bf1d-06b9cb90c7c0.json deleted file mode 100644 index 8531d2f1b47a82de7364e7e692b689fe2069bf94..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_AceInstruct-72B/08e924b1-121c-4ff7-bf1d-06b9cb90c7c0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nvidia_AceInstruct-72B/1762652580.4124959", - "retrieved_timestamp": "1762652580.4124968", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nvidia/AceInstruct-72B", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/AceInstruct-72B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.711888899231816 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6139041785911337 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6261329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3213087248322148 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42060416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48736702127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_AceInstruct-7B/d0680660-92e5-471b-a4c9-2658e7c59dd0.json b/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_AceInstruct-7B/d0680660-92e5-471b-a4c9-2658e7c59dd0.json deleted file mode 100644 index 001f18cf0c0e153d07e164afeceb434c16362ef4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_AceInstruct-7B/d0680660-92e5-471b-a4c9-2658e7c59dd0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nvidia_AceInstruct-7B/1762652580.412692", - "retrieved_timestamp": "1762652580.412693", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nvidia/AceInstruct-7B", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/AceInstruct-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5422290633297429 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.550118130896558 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5294561933534743 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4255 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.417719414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_AceMath-1.5B-Instruct/8584e2c5-dd32-4cd0-9089-1b4e17a1ffac.json b/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_AceMath-1.5B-Instruct/8584e2c5-dd32-4cd0-9089-1b4e17a1ffac.json deleted file mode 100644 index 635213ef4349d83af10e9cc8d0bcb26f2c394729..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_AceMath-1.5B-Instruct/8584e2c5-dd32-4cd0-9089-1b4e17a1ffac.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nvidia_AceMath-1.5B-Instruct/1762652580.412895", - "retrieved_timestamp": "1762652580.412896", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nvidia/AceMath-1.5B-Instruct", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/AceMath-1.5B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32123654126606294 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4024301274933693 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3606979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20636635638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_AceMath-72B-Instruct/4ba1027b-f0c1-4ed9-aa30-35c4e01e564d.json b/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_AceMath-72B-Instruct/4ba1027b-f0c1-4ed9-aa30-35c4e01e564d.json deleted file mode 100644 index 2fc5d25d03df631d77aa94799c0f8776d8f069ee..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_AceMath-72B-Instruct/4ba1027b-f0c1-4ed9-aa30-35c4e01e564d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nvidia_AceMath-72B-Instruct/1762652580.413093", - "retrieved_timestamp": "1762652580.4130938", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nvidia/AceMath-72B-Instruct", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/AceMath-72B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.494993284485166 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.640215611099268 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7145015105740181 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40615625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44107380319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_AceMath-72B-RM/5fdd0c8f-3393-4b59-8cc1-511c524c493a.json b/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_AceMath-72B-RM/5fdd0c8f-3393-4b59-8cc1-511c524c493a.json deleted file mode 100644 index 3531e45ca62aa20eef4787ac819678f83a84fbdb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_AceMath-72B-RM/5fdd0c8f-3393-4b59-8cc1-511c524c493a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nvidia_AceMath-72B-RM/1762652580.413297", - "retrieved_timestamp": "1762652580.413298", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nvidia/AceMath-72B-RM", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/AceMath-72B-RM" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14125963554479892 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2717426350897727 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23406040268456377 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3351458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11785239361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForSequenceClassification", - "params_billions": 71.461 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_AceMath-7B-Instruct/e1c94d59-dfa4-49cf-9052-9ce6e713a0be.json b/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_AceMath-7B-Instruct/e1c94d59-dfa4-49cf-9052-9ce6e713a0be.json deleted file mode 100644 index 3ed0e275a62d3ad916d70c1aba3e30758ecc0db2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_AceMath-7B-Instruct/e1c94d59-dfa4-49cf-9052-9ce6e713a0be.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nvidia_AceMath-7B-Instruct/1762652580.413503", - "retrieved_timestamp": "1762652580.413504", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nvidia/AceMath-7B-Instruct", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/AceMath-7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45317756885064964 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49938547326244365 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6336858006042296 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4192708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33834773936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_AceMath-7B-RM/ab9c685d-7b97-4bf4-bc0e-ffd5666e35d9.json b/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_AceMath-7B-RM/ab9c685d-7b97-4bf4-bc0e-ffd5666e35d9.json deleted file mode 100644 index 7dfd64f2b44dd1326e63daca320a9d220d54c402..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_AceMath-7B-RM/ab9c685d-7b97-4bf4-bc0e-ffd5666e35d9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nvidia_AceMath-7B-RM/1762652580.4138508", - "retrieved_timestamp": "1762652580.413853", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nvidia/AceMath-7B-RM", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/AceMath-7B-RM" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14937809456686035 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2422689292768334 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35800000000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11386303191489362 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForSequenceClassification", - "params_billions": 7.071 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_Hymba-1.5B-Base/89f9149f-1f6d-4389-819a-d958b0ecc6b8.json b/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_Hymba-1.5B-Base/89f9149f-1f6d-4389-819a-d958b0ecc6b8.json deleted file mode 100644 index 1f14634935be35ee3b40a7c2bed57d965a410575..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_Hymba-1.5B-Base/89f9149f-1f6d-4389-819a-d958b0ecc6b8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nvidia_Hymba-1.5B-Base/1762652580.4142", - "retrieved_timestamp": "1762652580.4142022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nvidia/Hymba-1.5B-Base", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/Hymba-1.5B-Base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2295121389025563 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32564785214182224 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3566354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19223736702127658 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "HymbaForCausalLM", - "params_billions": 1.523 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_Hymba-1.5B-Instruct/ae6e9c29-eb12-4dd5-bdbc-e84b499cf40f.json b/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_Hymba-1.5B-Instruct/ae6e9c29-eb12-4dd5-bdbc-e84b499cf40f.json deleted file mode 100644 index fbab24f93dec6c9f7872a81d204ae76e0e50e0bb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_Hymba-1.5B-Instruct/ae6e9c29-eb12-4dd5-bdbc-e84b499cf40f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nvidia_Hymba-1.5B-Instruct/1762652580.414529", - "retrieved_timestamp": "1762652580.41453", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nvidia/Hymba-1.5B-Instruct", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/Hymba-1.5B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6009055971488984 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3067133908231881 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33158333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20403922872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "HymbaForCausalLM", - "params_billions": 1.523 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_Llama-3.1-Nemotron-70B-Instruct-HF/2366b5e1-0a56-4d6e-83e6-12f12eca3ec4.json b/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_Llama-3.1-Nemotron-70B-Instruct-HF/2366b5e1-0a56-4d6e-83e6-12f12eca3ec4.json deleted file mode 100644 index 6de7c4561b154238b3e4ebba2c05fd564c8d4ed6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_Llama-3.1-Nemotron-70B-Instruct-HF/2366b5e1-0a56-4d6e-83e6-12f12eca3ec4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nvidia_Llama-3.1-Nemotron-70B-Instruct-HF/1762652580.415039", - "retrieved_timestamp": "1762652580.41504", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7380672172059026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6316000668895038 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42673716012084595 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4327604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49185505319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_Minitron-4B-Base/f5e52953-2dfc-4661-81cd-ed96d7a52482.json b/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_Minitron-4B-Base/f5e52953-2dfc-4661-81cd-ed96d7a52482.json deleted file mode 100644 index e74b74014fd6facccfaba403b857e26d37316f75..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_Minitron-4B-Base/f5e52953-2dfc-4661-81cd-ed96d7a52482.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nvidia_Minitron-4B-Base/1762652580.415251", - "retrieved_timestamp": "1762652580.415252", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nvidia/Minitron-4B-Base", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/Minitron-4B-Base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2217937295265451 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4083876243992497 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.413375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.261968085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "NemotronForCausalLM", - "params_billions": 4.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_Minitron-8B-Base/3f6ec864-adf4-422f-85c1-19ef2417489a.json b/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_Minitron-8B-Base/3f6ec864-adf4-422f-85c1-19ef2417489a.json deleted file mode 100644 index 2527591f6c39ef4a6cf34f37da9c6f4d2536aa8b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_Minitron-8B-Base/3f6ec864-adf4-422f-85c1-19ef2417489a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nvidia_Minitron-8B-Base/1762652580.415456", - "retrieved_timestamp": "1762652580.415456", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nvidia/Minitron-8B-Base", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/Minitron-8B-Base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24242676099416216 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43950631883576047 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0256797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40255208333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31806848404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "NemotronForCausalLM", - "params_billions": 7.22 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_Mistral-NeMo-Minitron-8B-Instruct/f4c299f0-d957-4784-8512-23f72a26a095.json b/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_Mistral-NeMo-Minitron-8B-Instruct/f4c299f0-d957-4784-8512-23f72a26a095.json deleted file mode 100644 index c7ec6c0a2a85e646bd4b06c5d4916f33a5b55d98..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_Mistral-NeMo-Minitron-8B-Instruct/f4c299f0-d957-4784-8512-23f72a26a095.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nvidia_Mistral-NeMo-Minitron-8B-Instruct/1762652580.415967", - "retrieved_timestamp": "1762652580.415968", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nvidia/Mistral-NeMo-Minitron-8B-Instruct", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/Mistral-NeMo-Minitron-8B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5003889679384035 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5320919605840294 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163141993957704 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38857291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39910239361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.414 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_Nemotron-Mini-4B-Instruct/ab7ee3ac-4d47-4ec6-a2af-8a6f7eb96684.json b/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_Nemotron-Mini-4B-Instruct/ab7ee3ac-4d47-4ec6-a2af-8a6f7eb96684.json deleted file mode 100644 index 66614dbb7cf4126867d9ca6dced6794f1049f6c1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nvidia/nvidia_Nemotron-Mini-4B-Instruct/ab7ee3ac-4d47-4ec6-a2af-8a6f7eb96684.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nvidia_Nemotron-Mini-4B-Instruct/1762652580.41618", - "retrieved_timestamp": "1762652580.416181", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nvidia/Nemotron-Mini-4B-Instruct", - "developer": "nvidia", - "inference_platform": "unknown", - "id": "nvidia/Nemotron-Mini-4B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6668761109411916 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3864840798591535 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0256797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3767291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26263297872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "NemotronForCausalLM", - "params_billions": 4.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/nxmwxm/nxmwxm_Beast-Soul-new/4ae25fa0-54af-4f47-853f-c97cd7b312d3.json b/leaderboard_data/HFOpenLLMv2/nxmwxm/nxmwxm_Beast-Soul-new/4ae25fa0-54af-4f47-853f-c97cd7b312d3.json deleted file mode 100644 index da6aaea8d981af364b030d27f4554f66171d15ce..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/nxmwxm/nxmwxm_Beast-Soul-new/4ae25fa0-54af-4f47-853f-c97cd7b312d3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/nxmwxm_Beast-Soul-new/1762652580.416598", - "retrieved_timestamp": "1762652580.416599", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "nxmwxm/Beast-Soul-new", - "developer": "nxmwxm", - "inference_platform": "unknown", - "id": "nxmwxm/Beast-Soul-new" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48687482546310457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5227143628884523 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4459270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3101728723404255 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/occiglot/occiglot_occiglot-7b-es-en-instruct/4207b47d-711c-4af8-9c70-becb270973eb.json b/leaderboard_data/HFOpenLLMv2/occiglot/occiglot_occiglot-7b-es-en-instruct/4207b47d-711c-4af8-9c70-becb270973eb.json deleted file mode 100644 index 4bdcf1e8c515f7f45bc6c3393ddb22f05b366951..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/occiglot/occiglot_occiglot-7b-es-en-instruct/4207b47d-711c-4af8-9c70-becb270973eb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/occiglot_occiglot-7b-es-en-instruct/1762652580.416852", - "retrieved_timestamp": "1762652580.416853", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "occiglot/occiglot-7b-es-en-instruct", - "developer": "occiglot", - "inference_platform": "unknown", - "id": "occiglot/occiglot-7b-es-en-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3485141646387142 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4110970229781084 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2310505319148936 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/odyssey-labs/odyssey-labs_Astral-1-10B/4fefa5ae-d421-4883-b734-d6cc8bd8f4d6.json b/leaderboard_data/HFOpenLLMv2/odyssey-labs/odyssey-labs_Astral-1-10B/4fefa5ae-d421-4883-b734-d6cc8bd8f4d6.json deleted file mode 100644 index 12a3cf0e45ef9e0f57b1ef3bc031d072e8168a16..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/odyssey-labs/odyssey-labs_Astral-1-10B/4fefa5ae-d421-4883-b734-d6cc8bd8f4d6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/odyssey-labs_Astral-1-10B/1762652580.417092", - "retrieved_timestamp": "1762652580.417093", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "odyssey-labs/Astral-1-10B", - "developer": "odyssey-labs", - "inference_platform": "unknown", - "id": "odyssey-labs/Astral-1-10B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38780657544204933 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4872563924334199 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42797916666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29853723404255317 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/olabs-ai/olabs-ai_reflection_model/84b63639-3343-4568-9fa7-d353ccb5b465.json b/leaderboard_data/HFOpenLLMv2/olabs-ai/olabs-ai_reflection_model/84b63639-3343-4568-9fa7-d353ccb5b465.json deleted file mode 100644 index ac49b94ad309087ee1e73c78c414ac41e977eca5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/olabs-ai/olabs-ai_reflection_model/84b63639-3343-4568-9fa7-d353ccb5b465.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/olabs-ai_reflection_model/1762652580.417324", - "retrieved_timestamp": "1762652580.417325", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "olabs-ai/reflection_model", - "developer": "olabs-ai", - "inference_platform": "unknown", - "id": "olabs-ai/reflection_model" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15986914719610634 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4712508645838735 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35083333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33111702127659576 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 9.3 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_RedPajama-3B-v1-AutoRedteam-Harmless-only/8b50fd5a-9f95-4213-98e2-ee66e1602cdf.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_RedPajama-3B-v1-AutoRedteam-Harmless-only/8b50fd5a-9f95-4213-98e2-ee66e1602cdf.json deleted file mode 100644 index b0ee964113b9ef6ef26fe8ba38ed0144fc53fe46..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_RedPajama-3B-v1-AutoRedteam-Harmless-only/8b50fd5a-9f95-4213-98e2-ee66e1602cdf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_RedPajama-3B-v1-AutoRedteam-Harmless-only/1762652580.418057", - "retrieved_timestamp": "1762652580.418057", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/RedPajama-3B-v1-AutoRedteam-Harmless-only", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/RedPajama-3B-v1-AutoRedteam-Harmless-only" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.152475431854147 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3123669789182832 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23154362416107382 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.366125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10995678191489362 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 2.776 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_RedPajama-3B-v1-AutoRedteam/9f85efe5-9fe1-4ad3-9438-da4dbf886f9d.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_RedPajama-3B-v1-AutoRedteam/9f85efe5-9fe1-4ad3-9438-da4dbf886f9d.json deleted file mode 100644 index 969e9d62e6192c2061e180a51d3f35de7e54175c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_RedPajama-3B-v1-AutoRedteam/9f85efe5-9fe1-4ad3-9438-da4dbf886f9d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_RedPajama-3B-v1-AutoRedteam/1762652580.4178078", - "retrieved_timestamp": "1762652580.4178078", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/RedPajama-3B-v1-AutoRedteam", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/RedPajama-3B-v1-AutoRedteam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13434021729012352 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30256825198631376 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2424496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36606249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1107878989361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 2.776 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_RedPajama3b_v1-autoredteam_helpfulness-train/d070a397-6bd5-4407-b030-aecdc31eb47c.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_RedPajama3b_v1-autoredteam_helpfulness-train/d070a397-6bd5-4407-b030-aecdc31eb47c.json deleted file mode 100644 index 29c46b62f0dec8fced266ca9e67ff8fa3665c735..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_RedPajama3b_v1-autoredteam_helpfulness-train/d070a397-6bd5-4407-b030-aecdc31eb47c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_RedPajama3b_v1-autoredteam_helpfulness-train/1762652580.4182642", - "retrieved_timestamp": "1762652580.418265", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/RedPajama3b_v1-autoredteam_helpfulness-train", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/RedPajama3b_v1-autoredteam_helpfulness-train" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2847666414003732 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30927408550278385 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35796875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11070478723404255 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 2.776 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_merged_0.2_expert_0.8-stack_2x/a0cdb8e9-7920-41eb-864d-9995c3168277.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_merged_0.2_expert_0.8-stack_2x/a0cdb8e9-7920-41eb-864d-9995c3168277.json deleted file mode 100644 index f590427b2b476211842d33b3024573df410f9475..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_merged_0.2_expert_0.8-stack_2x/a0cdb8e9-7920-41eb-864d-9995c3168277.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_merged_0.2_expert_0.8-stack_2x/1762652580.418678", - "retrieved_timestamp": "1762652580.418679", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/merged_0.2_expert_0.8-stack_2x", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/merged_0.2_expert_0.8-stack_2x" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17960345217356613 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30061312694162695 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11028922872340426 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 6.512 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_merged_0.2_expert_0.8/c373de55-1c2e-4cd5-a0e9-ec462f80010f.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_merged_0.2_expert_0.8/c373de55-1c2e-4cd5-a0e9-ec462f80010f.json deleted file mode 100644 index 28b2ad5319fc86e9a828ea9eaa4536a829e6d03c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_merged_0.2_expert_0.8/c373de55-1c2e-4cd5-a0e9-ec462f80010f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_merged_0.2_expert_0.8/1762652580.418474", - "retrieved_timestamp": "1762652580.418475", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/merged_0.2_expert_0.8", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/merged_0.2_expert_0.8" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17425763640473943 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3046000784127159 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36206249999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11112034574468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_merged_0.5_expert_0.5/d3dccfbc-ccc3-4d7c-abe3-4669c8efca3b.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_merged_0.5_expert_0.5/d3dccfbc-ccc3-4d7c-abe3-4669c8efca3b.json deleted file mode 100644 index d7bf52dac4394e27cd73922d76fb7e3c2a731fd5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_merged_0.5_expert_0.5/d3dccfbc-ccc3-4d7c-abe3-4669c8efca3b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_merged_0.5_expert_0.5/1762652580.418875", - "retrieved_timestamp": "1762652580.418876", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/merged_0.5_expert_0.5", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/merged_0.5_expert_0.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1787291054402319 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3017011118802398 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35424999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1107878989361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful/93164a9c-187c-45eb-94e0-12910b6ebd9d.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful/93164a9c-187c-45eb-94e0-12910b6ebd9d.json deleted file mode 100644 index 8a1a8d0edfb76d7bf734b1a6dc26bfc2a547317a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful/93164a9c-187c-45eb-94e0-12910b6ebd9d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful/1762652580.419096", - "retrieved_timestamp": "1762652580.419096", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13184240038652995 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3004467893724157 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36311458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11419547872340426 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_ontocord_wide_7b-stacked-stage1-instruct/92e8e4af-bdfd-4fb3-8b25-b7b88470c56c.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_ontocord_wide_7b-stacked-stage1-instruct/92e8e4af-bdfd-4fb3-8b25-b7b88470c56c.json deleted file mode 100644 index 8e3b9cf0dc786cea335101f01c8096c4fe8dc15c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_ontocord_wide_7b-stacked-stage1-instruct/92e8e4af-bdfd-4fb3-8b25-b7b88470c56c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_ontocord_wide_7b-stacked-stage1-instruct/1762652580.4195461", - "retrieved_timestamp": "1762652580.4195468", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/ontocord_wide_7b-stacked-stage1-instruct", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/ontocord_wide_7b-stacked-stage1-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15302508455342934 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2853913447506418 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24664429530201343 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35378125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11170212765957446 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.888 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_ontocord_wide_7b-stacked-stage1/8098c6f4-c2a4-44d9-92b5-72dfccd83395.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_ontocord_wide_7b-stacked-stage1/8098c6f4-c2a4-44d9-92b5-72dfccd83395.json deleted file mode 100644 index d1965251278e3ee7fb10c1b703a60c525ce9377f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_ontocord_wide_7b-stacked-stage1/8098c6f4-c2a4-44d9-92b5-72dfccd83395.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_ontocord_wide_7b-stacked-stage1/1762652580.41932", - "retrieved_timestamp": "1762652580.419321", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/ontocord_wide_7b-stacked-stage1", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/ontocord_wide_7b-stacked-stage1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14845388014911545 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28965200351622594 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3603541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11053856382978723 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.888 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_starcoder2-29b-ls/68285cd4-9573-4fa7-af6f-321c7b4c8171.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_starcoder2-29b-ls/68285cd4-9573-4fa7-af6f-321c7b4c8171.json deleted file mode 100644 index 9a2a210da332e38cb4eb757139a7925349a4aea4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_starcoder2-29b-ls/68285cd4-9573-4fa7-af6f-321c7b4c8171.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_starcoder2-29b-ls/1762652580.419764", - "retrieved_timestamp": "1762652580.419765", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/starcoder2-29b-ls", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/starcoder2-29b-ls" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21492417895628046 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37349755200329665 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0188821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36999999999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1869182180851064 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Starcoder2ForCausalLM", - "params_billions": 29.009 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_starcoder2_3b-AutoRedteam/9ae53763-119d-40af-bdf2-97dd34eaf9e3.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_starcoder2_3b-AutoRedteam/9ae53763-119d-40af-bdf2-97dd34eaf9e3.json deleted file mode 100644 index 5ad2b43b64641e1a0a6f102551cc12c079a6d641..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_starcoder2_3b-AutoRedteam/9ae53763-119d-40af-bdf2-97dd34eaf9e3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_starcoder2_3b-AutoRedteam/1762652580.419971", - "retrieved_timestamp": "1762652580.4199722", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/starcoder2_3b-AutoRedteam", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/starcoder2_3b-AutoRedteam" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15737133029251277 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3497644619743598 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3645729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13364361702127658 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Starcoder2ForCausalLM", - "params_billions": 3.181 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b-merge_test/db2c4148-d7be-4f13-a449-095b78bda7c2.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b-merge_test/db2c4148-d7be-4f13-a449-095b78bda7c2.json deleted file mode 100644 index b347bb89321e9839238d8a47424e44fec3b492d7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b-merge_test/db2c4148-d7be-4f13-a449-095b78bda7c2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b-merge_test/1762652580.420181", - "retrieved_timestamp": "1762652580.420182", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/wide_3b-merge_test", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b-merge_test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17628115622104903 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011467446788138 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23993288590604026 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.342 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10663231382978723 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained/91ac4c22-3f2a-48fd-aad8-5c26a5f07ea6.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained/91ac4c22-3f2a-48fd-aad8-5c26a5f07ea6.json deleted file mode 100644 index 9640db683fe2d6e5c562b6b5d56833918f021c49..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained/91ac4c22-3f2a-48fd-aad8-5c26a5f07ea6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained/1762652580.420386", - "retrieved_timestamp": "1762652580.420386", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/wide_3b-stage1_shuf_sample1_jsonl-pretrained", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b-stage1_shuf_sample1_jsonl-pretrained" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13946107439371977 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30036095049490824 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36320833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11402925531914894 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge/c5a9d4e0-a43b-4249-abbb-f544bdb2d806.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge/c5a9d4e0-a43b-4249-abbb-f544bdb2d806.json deleted file mode 100644 index 451e227e17dd9f111d2ccea4846dee48f8b3cc2a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge/c5a9d4e0-a43b-4249-abbb-f544bdb2d806.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge/1762652580.420605", - "retrieved_timestamp": "1762652580.420605", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16636413604790845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30309127879396963 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3845416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11112034574468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge/5b9a91bc-bdca-468e-b8eb-b0e97fd97148.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge/5b9a91bc-bdca-468e-b8eb-b0e97fd97148.json deleted file mode 100644 index e7f67f0f4934c2b5af73cc717588adccd07eea76..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge/5b9a91bc-bdca-468e-b8eb-b0e97fd97148.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge/1762652580.420933", - "retrieved_timestamp": "1762652580.420937", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16973629968483622 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2975125970659158 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37781249999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1124501329787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue/aeda694a-795c-4a42-8b40-d406b7223627.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue/aeda694a-795c-4a42-8b40-d406b7223627.json deleted file mode 100644 index d646a2a44b85f1e2bd72e5af40d0667cfe08dfdc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue/aeda694a-795c-4a42-8b40-d406b7223627.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue/1762652580.4213939", - "retrieved_timestamp": "1762652580.4213948", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14800396281865452 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30953444521357315 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3579375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1107878989361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue/3e26804b-13fa-4115-a000-d6be3339e7b1.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue/3e26804b-13fa-4115-a000-d6be3339e7b1.json deleted file mode 100644 index d9fa0a933ff8eb488c23aa6e7fbf5e2f069c5569..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue/3e26804b-13fa-4115-a000-d6be3339e7b1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue/1762652580.4216871", - "retrieved_timestamp": "1762652580.421689", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12367407368005781 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3060091508023586 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3672708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11112034574468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue/d1f24979-eced-4dca-a5a1-4e4bfee28779.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue/d1f24979-eced-4dca-a5a1-4e4bfee28779.json deleted file mode 100644 index 8921c9757726df55bdf18bf845b953b2d1a91b60..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue/d1f24979-eced-4dca-a5a1-4e4bfee28779.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue/1762652580.42205", - "retrieved_timestamp": "1762652580.422051", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1191527369601546 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2955590587949957 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35530208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11826795212765957 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/171ae287-000a-491e-9ecb-ac7d29217e9e.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/171ae287-000a-491e-9ecb-ac7d29217e9e.json deleted file mode 100644 index 47e092745871ced9975487aa4e319a9fae700768..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/171ae287-000a-491e-9ecb-ac7d29217e9e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/1762652580.42265", - "retrieved_timestamp": "1762652580.4226508", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1161551350416894 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3184343946486203 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34469791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11236702127659574 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/b7a0e530-08f8-4c6a-9258-733b59096812.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/b7a0e530-08f8-4c6a-9258-733b59096812.json deleted file mode 100644 index f966bffb8225a080efc9b77d0786fd45bb082446..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/b7a0e530-08f8-4c6a-9258-733b59096812.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue/1762652580.422383", - "retrieved_timestamp": "1762652580.422384", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1128328390891723 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3171441625189962 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26845637583892623 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34603125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11294880319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue/f14d0513-676d-45e3-97c4-bf386f61b856.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue/f14d0513-676d-45e3-97c4-bf386f61b856.json deleted file mode 100644 index 15779d14a72069f92347b552e33c10adda100b89..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue/f14d0513-676d-45e3-97c4-bf386f61b856.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue/1762652580.422879", - "retrieved_timestamp": "1762652580.42288", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13169279733329786 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30640062669813056 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34460416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11444481382978723 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue/4d673b5a-3237-433f-9e08-f614fe10edc4.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue/4d673b5a-3237-433f-9e08-f614fe10edc4.json deleted file mode 100644 index af0e3e5f53db11da6ca5b3f427c435ac3c348a08..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue/4d673b5a-3237-433f-9e08-f614fe10edc4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue/1762652580.4231439", - "retrieved_timestamp": "1762652580.423145", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.118178654857999 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3037498354512724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35669791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11619015957446809 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue/e19c2b24-4deb-45b4-a0a9-2d055bc90446.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue/e19c2b24-4deb-45b4-a0a9-2d055bc90446.json deleted file mode 100644 index 9cf24a0d62bf761984c70c3f16434e84fed6bf5f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue/e19c2b24-4deb-45b4-a0a9-2d055bc90446.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue/1762652580.423407", - "retrieved_timestamp": "1762652580.423407", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12399876771410967 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30324371251012056 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34869791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11278257978723404 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_math.no_issue/449f6b1a-5264-4c7b-82d6-60e61841b7d6.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_math.no_issue/449f6b1a-5264-4c7b-82d6-60e61841b7d6.json deleted file mode 100644 index e4842a9e7666c84c16ecfc4e82c68693085d958e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_math.no_issue/449f6b1a-5264-4c7b-82d6-60e61841b7d6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_math.no_issue/1762652580.423659", - "retrieved_timestamp": "1762652580.42366", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_math.no_issue", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_math.no_issue" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12981888057022034 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30518984588252307 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39276041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1146941489361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue/2e22170f-839d-482d-bc8a-ed345aa900af.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue/2e22170f-839d-482d-bc8a-ed345aa900af.json deleted file mode 100644 index 7b47d45ea5526fea93de63082b8390847e445b66..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue/2e22170f-839d-482d-bc8a-ed345aa900af.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue/1762652580.4239051", - "retrieved_timestamp": "1762652580.4239051", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20490742341431845 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911778102988436 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35753125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11668882978723404 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical/75f9224b-df09-4693-8b04-c00e17785250.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical/75f9224b-df09-4693-8b04-c00e17785250.json deleted file mode 100644 index 982ae3a2f42c2994994aa6fc86ff88a03ca16c04..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical/75f9224b-df09-4693-8b04-c00e17785250.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical/1762652580.42415", - "retrieved_timestamp": "1762652580.424151", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.146105666298754 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29981162881428614 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39257291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1141123670212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.2-ss1-expert_formatted_text/4bd52ced-e009-4805-8d0a-ce37b25f103c.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.2-ss1-expert_formatted_text/4bd52ced-e009-4805-8d0a-ce37b25f103c.json deleted file mode 100644 index fadef04b906a928390df72de9583c777d59ee012..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.2-ss1-expert_formatted_text/4bd52ced-e009-4805-8d0a-ce37b25f103c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.2-ss1-expert_formatted_text/1762652580.424435", - "retrieved_timestamp": "1762652580.424437", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.2-ss1-expert_formatted_text", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_formatted_text" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14872870649875664 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3068950688059236 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34739583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11461103723404255 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.2-ss1-expert_how-to/14e2e5a7-d43c-4a02-9af6-6c378778d7fc.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.2-ss1-expert_how-to/14e2e5a7-d43c-4a02-9af6-6c378778d7fc.json deleted file mode 100644 index 920dde00e1a93e5a9e4c301bfaa26e0dac3fc1b9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.2-ss1-expert_how-to/14e2e5a7-d43c-4a02-9af6-6c378778d7fc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.2-ss1-expert_how-to/1762652580.424736", - "retrieved_timestamp": "1762652580.424736", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.2-ss1-expert_how-to", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_how-to" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12454842041339201 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3047398483929371 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36581250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11527593085106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.2-ss1-expert_math/d2d7e55e-87a3-4390-a1e4-47a2d0c62bd2.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.2-ss1-expert_math/d2d7e55e-87a3-4390-a1e4-47a2d0c62bd2.json deleted file mode 100644 index eb8784fd7185863ee83aca4cb0d1fcd9762b12f1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.2-ss1-expert_math/d2d7e55e-87a3-4390-a1e4-47a2d0c62bd2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.2-ss1-expert_math/1762652580.42496", - "retrieved_timestamp": "1762652580.424961", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.2-ss1-expert_math", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_math" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19151850423542865 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3059577262726771 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37003125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10920877659574468 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.2-ss1-expert_news/a13cf03f-cf1a-49a8-ba6c-d6e3b27036fa.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.2-ss1-expert_news/a13cf03f-cf1a-49a8-ba6c-d6e3b27036fa.json deleted file mode 100644 index 20a1a81468fe7a8826067d30f6cf0e75655c6ce1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.2-ss1-expert_news/a13cf03f-cf1a-49a8-ba6c-d6e3b27036fa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.2-ss1-expert_news/1762652580.425178", - "retrieved_timestamp": "1762652580.4251788", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.2-ss1-expert_news", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_news" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16581448334862608 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2925879483112595 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01661631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36209375000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11112034574468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.2-ss1-expert_software/dab94fc0-5bea-4875-a802-8ef793bc7fc7.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.2-ss1-expert_software/dab94fc0-5bea-4875-a802-8ef793bc7fc7.json deleted file mode 100644 index 97a90f9a50db06d9d69863bfcd1d60b863726d63..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_3b_sft_stage1.2-ss1-expert_software/dab94fc0-5bea-4875-a802-8ef793bc7fc7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_wide_3b_sft_stage1.2-ss1-expert_software/1762652580.425399", - "retrieved_timestamp": "1762652580.4254", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/wide_3b_sft_stage1.2-ss1-expert_software", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_software" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1733832896714052 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2979956844198214 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35685416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11402925531914894 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.759 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked/e16d5502-1721-424f-a149-9a6233a2183a.json b/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked/e16d5502-1721-424f-a149-9a6233a2183a.json deleted file mode 100644 index 52786333c3220b1100b1e28114fa21b1e2ccece6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ontocord/ontocord_wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked/e16d5502-1721-424f-a149-9a6233a2183a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ontocord_wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked/1762652580.425614", - "retrieved_timestamp": "1762652580.425615", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ontocord/wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked", - "developer": "ontocord", - "inference_platform": "unknown", - "id": "ontocord/wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12439881736015992 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30264484636677236 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11145279255319149 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.888 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/oobabooga/oobabooga_CodeBooga-34B-v0.1/3086045f-e22d-4aca-9459-fc64454a2fb2.json b/leaderboard_data/HFOpenLLMv2/oobabooga/oobabooga_CodeBooga-34B-v0.1/3086045f-e22d-4aca-9459-fc64454a2fb2.json deleted file mode 100644 index 481a5d6ea6002f53fece2477e203a5d9acdbd648..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/oobabooga/oobabooga_CodeBooga-34B-v0.1/3086045f-e22d-4aca-9459-fc64454a2fb2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/oobabooga_CodeBooga-34B-v0.1/1762652580.425838", - "retrieved_timestamp": "1762652580.425838", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "oobabooga/CodeBooga-34B-v0.1", - "developer": "oobabooga", - "inference_platform": "unknown", - "id": "oobabooga/CodeBooga-34B-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5250180631834643 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3427441185661722 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43102083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23595412234042554 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 33.744 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/open-atlas/open-atlas_Atlas-Flash-1.5B-Preview/96ae17c1-69ef-46c6-bb15-c1b576ba8131.json b/leaderboard_data/HFOpenLLMv2/open-atlas/open-atlas_Atlas-Flash-1.5B-Preview/96ae17c1-69ef-46c6-bb15-c1b576ba8131.json deleted file mode 100644 index b1c619cca6539674c13792aced6b7bbd95a08b47..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/open-atlas/open-atlas_Atlas-Flash-1.5B-Preview/96ae17c1-69ef-46c6-bb15-c1b576ba8131.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/open-atlas_Atlas-Flash-1.5B-Preview/1762652580.4281778", - "retrieved_timestamp": "1762652580.4281778", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "open-atlas/Atlas-Flash-1.5B-Preview", - "developer": "open-atlas", - "inference_platform": "unknown", - "id": "open-atlas/Atlas-Flash-1.5B-Preview" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3269569187533522 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3215460102660847 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2212990936555891 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34879166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13738364361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/open-atlas/open-atlas_Atlas-Flash-7B-Preview/6fd7bb75-6648-4bfe-a232-f9efe4b7c45e.json b/leaderboard_data/HFOpenLLMv2/open-atlas/open-atlas_Atlas-Flash-7B-Preview/6fd7bb75-6648-4bfe-a232-f9efe4b7c45e.json deleted file mode 100644 index a4d6e6aeee0f8ab6db5ada344f815441c7235315..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/open-atlas/open-atlas_Atlas-Flash-7B-Preview/6fd7bb75-6648-4bfe-a232-f9efe4b7c45e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/open-atlas_Atlas-Flash-7B-Preview/1762652580.428412", - "retrieved_timestamp": "1762652580.428413", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "open-atlas/Atlas-Flash-7B-Preview", - "developer": "open-atlas", - "inference_platform": "unknown", - "id": "open-atlas/Atlas-Flash-7B-Preview" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3907543096761038 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3541994356643969 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25755287009063443 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38358333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27842420212765956 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/open-neo/open-neo_Kyro-n1-3B/0a8b6c55-da69-4f4d-98cc-9d3f5b82d9e2.json b/leaderboard_data/HFOpenLLMv2/open-neo/open-neo_Kyro-n1-3B/0a8b6c55-da69-4f4d-98cc-9d3f5b82d9e2.json deleted file mode 100644 index a029bdd833e737627a32e05df6affb40748346fd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/open-neo/open-neo_Kyro-n1-3B/0a8b6c55-da69-4f4d-98cc-9d3f5b82d9e2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/open-neo_Kyro-n1-3B/1762652580.428618", - "retrieved_timestamp": "1762652580.428618", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "open-neo/Kyro-n1-3B", - "developer": "open-neo", - "inference_platform": "unknown", - "id": "open-neo/Kyro-n1-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45949746672163194 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46853756471175373 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40879166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34225398936170215 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/open-neo/open-neo_Kyro-n1-7B/f69621cf-6e46-4805-b8f2-d7a7cba3a0e4.json b/leaderboard_data/HFOpenLLMv2/open-neo/open-neo_Kyro-n1-7B/f69621cf-6e46-4805-b8f2-d7a7cba3a0e4.json deleted file mode 100644 index 12764a765ed72f03c16fbb508a337fd47672eeec..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/open-neo/open-neo_Kyro-n1-7B/f69621cf-6e46-4805-b8f2-d7a7cba3a0e4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/open-neo_Kyro-n1-7B/1762652580.42885", - "retrieved_timestamp": "1762652580.42885", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "open-neo/Kyro-n1-7B", - "developer": "open-neo", - "inference_platform": "unknown", - "id": "open-neo/Kyro-n1-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5572669406064796 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5386561160683788 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38972809667673713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38841666666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.433344414893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/open-thoughts/open-thoughts_OpenThinker-7B/feb0d715-d1bc-4b0e-8585-a0646c07244b.json b/leaderboard_data/HFOpenLLMv2/open-thoughts/open-thoughts_OpenThinker-7B/feb0d715-d1bc-4b0e-8585-a0646c07244b.json deleted file mode 100644 index 2bdfc9c480d1f87887534098c705ac49d17bfdb8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/open-thoughts/open-thoughts_OpenThinker-7B/feb0d715-d1bc-4b0e-8585-a0646c07244b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/open-thoughts_OpenThinker-7B/1762652580.4290519", - "retrieved_timestamp": "1762652580.4290528", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "open-thoughts/OpenThinker-7B", - "developer": "open-thoughts", - "inference_platform": "unknown", - "id": "open-thoughts/OpenThinker-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4088895242401273 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5342727589615611 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4259818731117825 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38199999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41647273936170215 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/AI-Sweden-Models_gpt-sw3-40b/e791a3d6-928e-43c9-96ee-156901e8b18b.json b/leaderboard_data/HFOpenLLMv2/openai/AI-Sweden-Models_gpt-sw3-40b/e791a3d6-928e-43c9-96ee-156901e8b18b.json deleted file mode 100644 index 2e60ba5c5e358b44b3e98a2269491dfd1e616ac7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/AI-Sweden-Models_gpt-sw3-40b/e791a3d6-928e-43c9-96ee-156901e8b18b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/AI-Sweden-Models_gpt-sw3-40b/1762652579.475041", - "retrieved_timestamp": "1762652579.475042", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "AI-Sweden-Models/gpt-sw3-40b", - "developer": "openai", - "inference_platform": "unknown", - "id": "AI-Sweden-Models/gpt-sw3-40b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1470298807164989 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3267744702957652 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36323958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12757646276595744 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPT2LMHeadModel", - "params_billions": 39.927 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/DeepAutoAI_causal_gpt2/bf683545-a6df-4deb-9a91-ea6b8eae8be7.json b/leaderboard_data/HFOpenLLMv2/openai/DeepAutoAI_causal_gpt2/bf683545-a6df-4deb-9a91-ea6b8eae8be7.json deleted file mode 100644 index bfb3a6af7b50866009c17d0e67d7dbf1e034abf9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/DeepAutoAI_causal_gpt2/bf683545-a6df-4deb-9a91-ea6b8eae8be7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepAutoAI_causal_gpt2/1762652579.548641", - "retrieved_timestamp": "1762652579.5486422", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepAutoAI/causal_gpt2", - "developer": "openai", - "inference_platform": "unknown", - "id": "DeepAutoAI/causal_gpt2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1812767900282362 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30257073962835446 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42695833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11311502659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.124 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/DeepAutoAI_d2nwg_causal_gpt2/6b5b21c7-9284-4117-a63c-65628604e1a5.json b/leaderboard_data/HFOpenLLMv2/openai/DeepAutoAI_d2nwg_causal_gpt2/6b5b21c7-9284-4117-a63c-65628604e1a5.json deleted file mode 100644 index e49ad022f3e368e0751714d17e39ab2d2723d229..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/DeepAutoAI_d2nwg_causal_gpt2/6b5b21c7-9284-4117-a63c-65628604e1a5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepAutoAI_d2nwg_causal_gpt2/1762652579.549271", - "retrieved_timestamp": "1762652579.549272", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepAutoAI/d2nwg_causal_gpt2", - "developer": "openai", - "inference_platform": "unknown", - "id": "DeepAutoAI/d2nwg_causal_gpt2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19161823960425006 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30268984588252307 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42971875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11510970744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.124 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/DeepAutoAI_d2nwg_causal_gpt2_v1/f822093a-2bdc-4284-8af2-8048d09afeb2.json b/leaderboard_data/HFOpenLLMv2/openai/DeepAutoAI_d2nwg_causal_gpt2_v1/f822093a-2bdc-4284-8af2-8048d09afeb2.json deleted file mode 100644 index 61b74cf1fad2a8c37c9f66a0983b3ba6eb09d3c2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/DeepAutoAI_d2nwg_causal_gpt2_v1/f822093a-2bdc-4284-8af2-8048d09afeb2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/DeepAutoAI_d2nwg_causal_gpt2_v1/1762652579.549553", - "retrieved_timestamp": "1762652579.5495539", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "DeepAutoAI/d2nwg_causal_gpt2_v1", - "developer": "openai", - "inference_platform": "unknown", - "id": "DeepAutoAI/d2nwg_causal_gpt2_v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1988623518929773 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29918984588252306 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4336875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11353058510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.124 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/EleutherAI_gpt-j-6b/1f140f2a-c9cb-49fb-8bcd-e59f699fd12a.json b/leaderboard_data/HFOpenLLMv2/openai/EleutherAI_gpt-j-6b/1f140f2a-c9cb-49fb-8bcd-e59f699fd12a.json deleted file mode 100644 index 47449da61ebf4f5f2bc7b09e017788924b2ee194..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/EleutherAI_gpt-j-6b/1f140f2a-c9cb-49fb-8bcd-e59f699fd12a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EleutherAI_gpt-j-6b/1762652579.5928068", - "retrieved_timestamp": "1762652579.592808", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EleutherAI/gpt-j-6b", - "developer": "openai", - "inference_platform": "unknown", - "id": "EleutherAI/gpt-j-6b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2522185578708937 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3191044431037278 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36575 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12408577127659574 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GPTJForCausalLM", - "params_billions": 6.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/EleutherAI_gpt-neo-1.3B/dc615b98-9255-4a6e-afe2-c79d59362520.json b/leaderboard_data/HFOpenLLMv2/openai/EleutherAI_gpt-neo-1.3B/dc615b98-9255-4a6e-afe2-c79d59362520.json deleted file mode 100644 index 1ca579f6d92156c75fde20597135c4ea622c9eac..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/EleutherAI_gpt-neo-1.3B/dc615b98-9255-4a6e-afe2-c79d59362520.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EleutherAI_gpt-neo-1.3B/1762652579.59305", - "retrieved_timestamp": "1762652579.59305", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EleutherAI/gpt-neo-1.3B", - "developer": "openai", - "inference_platform": "unknown", - "id": "EleutherAI/gpt-neo-1.3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20790502533278366 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30392315869356407 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38165625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163563829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GPTNeoForCausalLM", - "params_billions": 1.366 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/EleutherAI_gpt-neo-125m/cff09938-5918-4825-b974-194019b48165.json b/leaderboard_data/HFOpenLLMv2/openai/EleutherAI_gpt-neo-125m/cff09938-5918-4825-b974-194019b48165.json deleted file mode 100644 index 2a048d7382b194940f7feb8dd8d3ea2ad064869e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/EleutherAI_gpt-neo-125m/cff09938-5918-4825-b974-194019b48165.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EleutherAI_gpt-neo-125m/1762652579.593268", - "retrieved_timestamp": "1762652579.593268", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EleutherAI/gpt-neo-125m", - "developer": "openai", - "inference_platform": "unknown", - "id": "EleutherAI/gpt-neo-125m" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19054442213327305 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3115156885791523 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3593333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10255984042553191 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GPTNeoForCausalLM", - "params_billions": 0.15 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/EleutherAI_gpt-neo-2.7B/6ebf0016-f747-4ccd-82fa-db427733b2f9.json b/leaderboard_data/HFOpenLLMv2/openai/EleutherAI_gpt-neo-2.7B/6ebf0016-f747-4ccd-82fa-db427733b2f9.json deleted file mode 100644 index c63d3034164a9b5931d7e2f832b3c67e1d6755b9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/EleutherAI_gpt-neo-2.7B/6ebf0016-f747-4ccd-82fa-db427733b2f9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EleutherAI_gpt-neo-2.7B/1762652579.5934908", - "retrieved_timestamp": "1762652579.5934908", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EleutherAI/gpt-neo-2.7B", - "developer": "openai", - "inference_platform": "unknown", - "id": "EleutherAI/gpt-neo-2.7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2589628851447493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3139516033315253 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3553645833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11627327127659574 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GPTNeoForCausalLM", - "params_billions": 2.718 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/EleutherAI_gpt-neox-20b/0da6366b-b997-411e-ac76-c25b061e13f8.json b/leaderboard_data/HFOpenLLMv2/openai/EleutherAI_gpt-neox-20b/0da6366b-b997-411e-ac76-c25b061e13f8.json deleted file mode 100644 index fbde21f171d4fe86d2ee7e74947412b6b3065fc7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/EleutherAI_gpt-neox-20b/0da6366b-b997-411e-ac76-c25b061e13f8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/EleutherAI_gpt-neox-20b/1762652579.5937028", - "retrieved_timestamp": "1762652579.593704", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "EleutherAI/gpt-neox-20b", - "developer": "openai", - "inference_platform": "unknown", - "id": "EleutherAI/gpt-neox-20b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2586880587951081 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31650380320877564 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36466666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1155252659574468 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 20.739 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/Kimargin_GPT-NEO-1.3B-wiki/9084d476-dee7-4447-9955-e0f066bd35ba.json b/leaderboard_data/HFOpenLLMv2/openai/Kimargin_GPT-NEO-1.3B-wiki/9084d476-dee7-4447-9955-e0f066bd35ba.json deleted file mode 100644 index 9b826c1c2a1ef73cada5cd2100d48f30832aa5c0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/Kimargin_GPT-NEO-1.3B-wiki/9084d476-dee7-4447-9955-e0f066bd35ba.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Kimargin_GPT-NEO-1.3B-wiki/1762652579.6992168", - "retrieved_timestamp": "1762652579.699218", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Kimargin/GPT-NEO-1.3B-wiki", - "developer": "openai", - "inference_platform": "unknown", - "id": "Kimargin/GPT-NEO-1.3B-wiki" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19206815693471102 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3026339952046975 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24496644295302014 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3882604166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10987367021276596 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoForCausalLM", - "params_billions": 1.316 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/NYTK_PULI-GPTrio/685fc779-4f8b-4110-82da-5a49697153a0.json b/leaderboard_data/HFOpenLLMv2/openai/NYTK_PULI-GPTrio/685fc779-4f8b-4110-82da-5a49697153a0.json deleted file mode 100644 index dfeb6e3adfeded7001050b8ebfafd8998172a509..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/NYTK_PULI-GPTrio/685fc779-4f8b-4110-82da-5a49697153a0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/NYTK_PULI-GPTrio/1762652579.769266", - "retrieved_timestamp": "1762652579.769266", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "NYTK/PULI-GPTrio", - "developer": "openai", - "inference_platform": "unknown", - "id": "NYTK/PULI-GPTrio" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21797164855915638 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30600290906237543 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38187499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11369680851063829 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 7.673 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/Sharathhebbar24_chat_gpt2_dpo/ce90bca7-f999-44ef-9b72-1fdb4ac68eb0.json b/leaderboard_data/HFOpenLLMv2/openai/Sharathhebbar24_chat_gpt2_dpo/ce90bca7-f999-44ef-9b72-1fdb4ac68eb0.json deleted file mode 100644 index ea3a1a2eedfd6c061ef2021ffb32931e6e5c26da..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/Sharathhebbar24_chat_gpt2_dpo/ce90bca7-f999-44ef-9b72-1fdb4ac68eb0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/Sharathhebbar24_chat_gpt2_dpo/1762652579.8799832", - "retrieved_timestamp": "1762652579.8799841", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "Sharathhebbar24/chat_gpt2_dpo", - "developer": "openai", - "inference_platform": "unknown", - "id": "Sharathhebbar24/chat_gpt2_dpo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09861944086135896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29022988561565644 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38184375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11419547872340426 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.124 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/distilbert_distilgpt2/a21cd9f0-6006-4587-bcd1-f1d42dfce7ba.json b/leaderboard_data/HFOpenLLMv2/openai/distilbert_distilgpt2/a21cd9f0-6006-4587-bcd1-f1d42dfce7ba.json deleted file mode 100644 index f1c026e2030d01f2e235df0471c48deb4e4076f2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/distilbert_distilgpt2/a21cd9f0-6006-4587-bcd1-f1d42dfce7ba.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/distilbert_distilgpt2/1762652580.1266282", - "retrieved_timestamp": "1762652580.126629", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "distilbert/distilgpt2", - "developer": "openai", - "inference_platform": "unknown", - "id": "distilbert/distilgpt2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06110010328151527 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3037988148650536 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42072916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11868351063829788 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.088 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/langgptai_Qwen-las-v0.1/cfaa9b4e-8588-45a5-9b9d-4268a71b128b.json b/leaderboard_data/HFOpenLLMv2/openai/langgptai_Qwen-las-v0.1/cfaa9b4e-8588-45a5-9b9d-4268a71b128b.json deleted file mode 100644 index 47b7c2195bf7b1f12d0a3d441b5ed9a7405efe3b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/langgptai_Qwen-las-v0.1/cfaa9b4e-8588-45a5-9b9d-4268a71b128b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/langgptai_Qwen-las-v0.1/1762652580.313808", - "retrieved_timestamp": "1762652580.313809", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "langgptai/Qwen-las-v0.1", - "developer": "openai", - "inference_platform": "unknown", - "id": "langgptai/Qwen-las-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33010412372504955 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38925525629956187 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24664429530201343 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37009374999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2325465425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 7.901 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/meraGPT_mera-mix-4x7B/152e8d2f-8470-45b2-8318-9b6c44438978.json b/leaderboard_data/HFOpenLLMv2/openai/meraGPT_mera-mix-4x7B/152e8d2f-8470-45b2-8318-9b6c44438978.json deleted file mode 100644 index 4e3afd2bf549df56613a87f2bdfa313bf7bbfb50..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/meraGPT_mera-mix-4x7B/152e8d2f-8470-45b2-8318-9b6c44438978.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/meraGPT_mera-mix-4x7B/1762652580.345789", - "retrieved_timestamp": "1762652580.34579", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meraGPT/mera-mix-4x7B", - "developer": "openai", - "inference_platform": "unknown", - "id": "meraGPT/mera-mix-4x7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4831779677921249 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40189899163661713 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40565625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27476728723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 24.154 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/microsoft_DialoGPT-medium/3c70b5d5-784d-41fb-8ca7-eabd6a96a195.json b/leaderboard_data/HFOpenLLMv2/openai/microsoft_DialoGPT-medium/3c70b5d5-784d-41fb-8ca7-eabd6a96a195.json deleted file mode 100644 index e4b7e0c8de322779acbcb9c7e4a06ba589daf99f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/microsoft_DialoGPT-medium/3c70b5d5-784d-41fb-8ca7-eabd6a96a195.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/microsoft_DialoGPT-medium/1762652580.353813", - "retrieved_timestamp": "1762652580.3538141", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "microsoft/DialoGPT-medium", - "developer": "openai", - "inference_platform": "unknown", - "id": "microsoft/DialoGPT-medium" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14790422744983311 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3014156380141994 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4286666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1118683510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.345 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/openai-community_gpt2-large/15499118-2a47-4a6f-8c86-158a87a9350f.json b/leaderboard_data/HFOpenLLMv2/openai/openai-community_gpt2-large/15499118-2a47-4a6f-8c86-158a87a9350f.json deleted file mode 100644 index ade51d12d17c547b622f835760a4b2f14f12420f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/openai-community_gpt2-large/15499118-2a47-4a6f-8c86-158a87a9350f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/openai-community_gpt2-large/1762652580.4297202", - "retrieved_timestamp": "1762652580.429721", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "openai-community/gpt2-large", - "developer": "openai", - "inference_platform": "unknown", - "id": "openai-community/gpt2-large" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20478220011790937 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30688418760118824 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015104 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3788645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11419547872340426 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.812 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/openai-community_gpt2-medium/f68c55dc-0d74-4c75-ac57-62f23cce01b5.json b/leaderboard_data/HFOpenLLMv2/openai/openai-community_gpt2-medium/f68c55dc-0d74-4c75-ac57-62f23cce01b5.json deleted file mode 100644 index 1468d12d91be7bcde5d5b1f731ce91971bec6552..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/openai-community_gpt2-medium/f68c55dc-0d74-4c75-ac57-62f23cce01b5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/openai-community_gpt2-medium/1762652580.4299362", - "retrieved_timestamp": "1762652580.429937", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "openai-community/gpt2-medium", - "developer": "openai", - "inference_platform": "unknown", - "id": "openai-community/gpt2-medium" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22084402718121252 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3050280232176266 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3884479166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11818484042553191 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.38 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/openai-community_gpt2-xl/39a68088-0a01-482d-81b3-c6a84d98d0ca.json b/leaderboard_data/HFOpenLLMv2/openai/openai-community_gpt2-xl/39a68088-0a01-482d-81b3-c6a84d98d0ca.json deleted file mode 100644 index c4c9ff0a99f29b1028f93e4cd7e5f842dc05ac51..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/openai-community_gpt2-xl/39a68088-0a01-482d-81b3-c6a84d98d0ca.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/openai-community_gpt2-xl/1762652580.430138", - "retrieved_timestamp": "1762652580.430138", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "openai-community/gpt2-xl", - "developer": "openai", - "inference_platform": "unknown", - "id": "openai-community/gpt2-xl" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20385798570016445 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30085761123260785 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37095833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11311502659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 1.608 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/openai-community_gpt2/435a8268-cf26-4c78-8789-758dd32759b1.json b/leaderboard_data/HFOpenLLMv2/openai/openai-community_gpt2/435a8268-cf26-4c78-8789-758dd32759b1.json deleted file mode 100644 index 5cd981ef55a1b0469f8a385901b4c2397c8dc841..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/openai-community_gpt2/435a8268-cf26-4c78-8789-758dd32759b1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/openai-community_gpt2/1762652580.429537", - "retrieved_timestamp": "1762652580.429537", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "openai-community/gpt2", - "developer": "openai", - "inference_platform": "unknown", - "id": "openai-community/gpt2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17795449407571912 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30165801067653053 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.005287009063444109 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43902083333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11652260638297872 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.137 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/openai-community_gpt2/a18409fa-1372-401e-8ae5-f25eaa6386d2.json b/leaderboard_data/HFOpenLLMv2/openai/openai-community_gpt2/a18409fa-1372-401e-8ae5-f25eaa6386d2.json deleted file mode 100644 index b91d1349f11d0d6113a399d1afe488c6394da03e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/openai-community_gpt2/a18409fa-1372-401e-8ae5-f25eaa6386d2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/openai-community_gpt2/1762652580.42929", - "retrieved_timestamp": "1762652580.429291", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "openai-community/gpt2", - "developer": "openai", - "inference_platform": "unknown", - "id": "openai-community/gpt2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17925327021192655 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3035711244213359 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44705208333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11594082446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.137 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/openai_gpt2/43c1b559-e9e8-477e-95d9-1c28ac5d265c.json b/leaderboard_data/HFOpenLLMv2/openai/openai_gpt2/43c1b559-e9e8-477e-95d9-1c28ac5d265c.json deleted file mode 100644 index 583e7ce09985918d177865cdcb489eb8ef052e9e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/openai_gpt2/43c1b559-e9e8-477e-95d9-1c28ac5d265c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/gpt2/1762652580.1809301", - "retrieved_timestamp": "1762652580.180931", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "gpt2", - "developer": "openai", - "inference_platform": "unknown", - "id": "openai/gpt2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1934168007553292 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036385401516729 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0030211480362537764 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43241666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1149434840425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.137 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/openai_gpt2/e28a8f11-68f6-464f-b1b8-21938cb41aa3.json b/leaderboard_data/HFOpenLLMv2/openai/openai_gpt2/e28a8f11-68f6-464f-b1b8-21938cb41aa3.json deleted file mode 100644 index 36ae93460aa1979917fe41b8270c42a26eeca6d0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/openai_gpt2/e28a8f11-68f6-464f-b1b8-21938cb41aa3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/gpt2/1762652580.181142", - "retrieved_timestamp": "1762652580.181143", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "gpt2", - "developer": "openai", - "inference_platform": "unknown", - "id": "openai/gpt2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08333333333333333 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30833333333333335 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23333333333333334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4333333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.137 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/postbot_gpt2-medium-emailgen/a661e335-7ed5-43b9-aa3b-1e027cebdb75.json b/leaderboard_data/HFOpenLLMv2/openai/postbot_gpt2-medium-emailgen/a661e335-7ed5-43b9-aa3b-1e027cebdb75.json deleted file mode 100644 index 33fc078b9bb7810b1c8028ea52842ed4f5f6264f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/postbot_gpt2-medium-emailgen/a661e335-7ed5-43b9-aa3b-1e027cebdb75.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/postbot_gpt2-medium-emailgen/1762652580.4421701", - "retrieved_timestamp": "1762652580.4421709", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "postbot/gpt2-medium-emailgen", - "developer": "openai", - "inference_platform": "unknown", - "id": "postbot/gpt2-medium-emailgen" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1492030035860406 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31304286003933807 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3911145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1146941489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.38 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/sumink_ftgpt/ba4e0ed2-201a-4007-afbe-65e8276d853c.json b/leaderboard_data/HFOpenLLMv2/openai/sumink_ftgpt/ba4e0ed2-201a-4007-afbe-65e8276d853c.json deleted file mode 100644 index 7bd084c3a3d981387e265c59d464ec6856eea4a4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/sumink_ftgpt/ba4e0ed2-201a-4007-afbe-65e8276d853c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sumink_ftgpt/1762652580.5475452", - "retrieved_timestamp": "1762652580.5475461", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sumink/ftgpt", - "developer": "openai", - "inference_platform": "unknown", - "id": "sumink/ftgpt" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0787100449030794 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29190853217047663 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41384375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1171875 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "GPT2LMHeadModel", - "params_billions": 0.124 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/togethercomputer_GPT-JT-6B-v1/03196258-8cc8-4c57-badf-9085ede8d658.json b/leaderboard_data/HFOpenLLMv2/openai/togethercomputer_GPT-JT-6B-v1/03196258-8cc8-4c57-badf-9085ede8d658.json deleted file mode 100644 index 6e5846045c38b7e6818fdff2748f9d4b53d559ef..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/togethercomputer_GPT-JT-6B-v1/03196258-8cc8-4c57-badf-9085ede8d658.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/togethercomputer_GPT-JT-6B-v1/1762652580.574097", - "retrieved_timestamp": "1762652580.5740979", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "togethercomputer/GPT-JT-6B-v1", - "developer": "openai", - "inference_platform": "unknown", - "id": "togethercomputer/GPT-JT-6B-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20610646418170453 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33026609127426704 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37365625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16256648936170212 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPTJForCausalLM", - "params_billions": 6.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/universalml_NepaliGPT-2.0/07a71559-e618-4ba7-8721-bc6834f1c727.json b/leaderboard_data/HFOpenLLMv2/openai/universalml_NepaliGPT-2.0/07a71559-e618-4ba7-8721-bc6834f1c727.json deleted file mode 100644 index acb735e5e39abb871de24cc7b0bd48378ba2673e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/universalml_NepaliGPT-2.0/07a71559-e618-4ba7-8721-bc6834f1c727.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/universalml_NepaliGPT-2.0/1762652580.578092", - "retrieved_timestamp": "1762652580.578093", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "universalml/NepaliGPT-2.0", - "developer": "openai", - "inference_platform": "unknown", - "id": "universalml/NepaliGPT-2.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03649538779327739 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46604761322722105 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4656770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3299534574468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/yuchenxie_ArlowGPT-3B-Multilingual/fd270937-c889-4a2b-aada-341a44c80d46.json b/leaderboard_data/HFOpenLLMv2/openai/yuchenxie_ArlowGPT-3B-Multilingual/fd270937-c889-4a2b-aada-341a44c80d46.json deleted file mode 100644 index 462b6ac50873e3eaf520aa48c5847c62a6bbec2e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/yuchenxie_ArlowGPT-3B-Multilingual/fd270937-c889-4a2b-aada-341a44c80d46.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/yuchenxie_ArlowGPT-3B-Multilingual/1762652580.611115", - "retrieved_timestamp": "1762652580.611116", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "yuchenxie/ArlowGPT-3B-Multilingual", - "developer": "openai", - "inference_platform": "unknown", - "id": "yuchenxie/ArlowGPT-3B-Multilingual" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6395486198841297 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4301403132173714 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37266666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2816655585106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openai/yuchenxie_ArlowGPT-8B/af890cb6-9d90-41b0-a7a1-c87f3584b93c.json b/leaderboard_data/HFOpenLLMv2/openai/yuchenxie_ArlowGPT-8B/af890cb6-9d90-41b0-a7a1-c87f3584b93c.json deleted file mode 100644 index 103fcf9bee1a2f02929ba911f76636636ea600af..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openai/yuchenxie_ArlowGPT-8B/af890cb6-9d90-41b0-a7a1-c87f3584b93c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/yuchenxie_ArlowGPT-8B/1762652580.611377", - "retrieved_timestamp": "1762652580.611378", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "yuchenxie/ArlowGPT-8B", - "developer": "openai", - "inference_platform": "unknown", - "id": "yuchenxie/ArlowGPT-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7846536079823756 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5080162816130412 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3882291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.378656914893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openbmb/openbmb_MiniCPM-S-1B-sft-llama-format/53b78e02-9491-4f3b-a03b-7c015dde640a.json b/leaderboard_data/HFOpenLLMv2/openbmb/openbmb_MiniCPM-S-1B-sft-llama-format/53b78e02-9491-4f3b-a03b-7c015dde640a.json deleted file mode 100644 index d3973e02e7e4797056d3c825595311286e1a7421..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openbmb/openbmb_MiniCPM-S-1B-sft-llama-format/53b78e02-9491-4f3b-a03b-7c015dde640a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/openbmb_MiniCPM-S-1B-sft-llama-format/1762652580.430347", - "retrieved_timestamp": "1762652580.430348", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "openbmb/MiniCPM-S-1B-sft-llama-format", - "developer": "openbmb", - "inference_platform": "unknown", - "id": "openbmb/MiniCPM-S-1B-sft-llama-format" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3328767669782843 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30493136322070497 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33167708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1858377659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openchat/openchat_openchat-3.5-0106/51cd5c94-7c87-4758-aadc-46acf20ab4b0.json b/leaderboard_data/HFOpenLLMv2/openchat/openchat_openchat-3.5-0106/51cd5c94-7c87-4758-aadc-46acf20ab4b0.json deleted file mode 100644 index e5855b3138b4a78f49b12fced188e750a75d01fd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openchat/openchat_openchat-3.5-0106/51cd5c94-7c87-4758-aadc-46acf20ab4b0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/openchat_openchat-3.5-0106/1762652580.430586", - "retrieved_timestamp": "1762652580.4305868", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "openchat/openchat-3.5-0106", - "developer": "openchat", - "inference_platform": "unknown", - "id": "openchat/openchat-3.5-0106" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5966590867786362 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46169787083960595 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42543749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3291223404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openchat/openchat_openchat-3.5-1210/6b3c8f0b-25ed-4ae3-be89-a91815091de0.json b/leaderboard_data/HFOpenLLMv2/openchat/openchat_openchat-3.5-1210/6b3c8f0b-25ed-4ae3-be89-a91815091de0.json deleted file mode 100644 index f9d9e8c7f60c74aa2e714b9a794972144693775a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openchat/openchat_openchat-3.5-1210/6b3c8f0b-25ed-4ae3-be89-a91815091de0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/openchat_openchat-3.5-1210/1762652580.430838", - "retrieved_timestamp": "1762652580.430839", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "openchat/openchat-3.5-1210", - "developer": "openchat", - "inference_platform": "unknown", - "id": "openchat/openchat-3.5-1210" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.603678240402133 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4535356846447984 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4414375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3142453457446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openchat/openchat_openchat-3.6-8b-20240522/2305b9e7-1c2b-42d7-b306-802e32d53e0f.json b/leaderboard_data/HFOpenLLMv2/openchat/openchat_openchat-3.6-8b-20240522/2305b9e7-1c2b-42d7-b306-802e32d53e0f.json deleted file mode 100644 index f554bad3fb3754fadb1a202e9310417452f57679..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openchat/openchat_openchat-3.6-8b-20240522/2305b9e7-1c2b-42d7-b306-802e32d53e0f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/openchat_openchat-3.6-8b-20240522/1762652580.4310489", - "retrieved_timestamp": "1762652580.43105", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "openchat/openchat-3.6-8b-20240522", - "developer": "openchat", - "inference_platform": "unknown", - "id": "openchat/openchat-3.6-8b-20240522" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5343355629729118 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5338412089001999 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3998541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32288896276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openchat/openchat_openchat_3.5/c2d66fd5-6c95-4b8e-b87f-c8f0ae00271a.json b/leaderboard_data/HFOpenLLMv2/openchat/openchat_openchat_3.5/c2d66fd5-6c95-4b8e-b87f-c8f0ae00271a.json deleted file mode 100644 index fce727c976509032129b6ba442dd4392f16701c8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openchat/openchat_openchat_3.5/c2d66fd5-6c95-4b8e-b87f-c8f0ae00271a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/openchat_openchat_3.5/1762652580.431262", - "retrieved_timestamp": "1762652580.431263", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "openchat/openchat_3.5", - "developer": "openchat", - "inference_platform": "unknown", - "id": "openchat/openchat_3.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5931118321608887 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44263196862832893 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07250755287009064 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4228645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31532579787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openchat/openchat_openchat_v3.2/2ee1a517-ef52-469e-ac5d-f14e3d72c87c.json b/leaderboard_data/HFOpenLLMv2/openchat/openchat_openchat_v3.2/2ee1a517-ef52-469e-ac5d-f14e3d72c87c.json deleted file mode 100644 index dfb64c94f2f9c4e5d91ebd6dc460fe40cf97d52c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openchat/openchat_openchat_v3.2/2ee1a517-ef52-469e-ac5d-f14e3d72c87c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/openchat_openchat_v3.2/1762652580.431712", - "retrieved_timestamp": "1762652580.431714", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "openchat/openchat_v3.2", - "developer": "openchat", - "inference_platform": "unknown", - "id": "openchat/openchat_v3.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2980558252104416 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4330564283474314 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.433625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2421875 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/openchat/openchat_openchat_v3.2_super/b7b3fcb7-bbc7-4f39-9daa-7a54362d5d68.json b/leaderboard_data/HFOpenLLMv2/openchat/openchat_openchat_v3.2_super/b7b3fcb7-bbc7-4f39-9daa-7a54362d5d68.json deleted file mode 100644 index 72db402a341c3abfca2a2c74651f9faab5a813c2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/openchat/openchat_openchat_v3.2_super/b7b3fcb7-bbc7-4f39-9daa-7a54362d5d68.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/openchat_openchat_v3.2_super/1762652580.431961", - "retrieved_timestamp": "1762652580.431962", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "openchat/openchat_v3.2_super", - "developer": "openchat", - "inference_platform": "unknown", - "id": "openchat/openchat_v3.2_super" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2861906408329898 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42212089838803973 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41613541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24251994680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/oxyapi/oxyapi_oxy-1-small/62126b06-5bd2-451f-a76c-7c227690f149.json b/leaderboard_data/HFOpenLLMv2/oxyapi/oxyapi_oxy-1-small/62126b06-5bd2-451f-a76c-7c227690f149.json deleted file mode 100644 index 3284a538c3ecd38b8ef441d7af42678072d5a966..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/oxyapi/oxyapi_oxy-1-small/62126b06-5bd2-451f-a76c-7c227690f149.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/oxyapi_oxy-1-small/1762652580.432582", - "retrieved_timestamp": "1762652580.432582", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "oxyapi/oxy-1-small", - "developer": "oxyapi", - "inference_platform": "unknown", - "id": "oxyapi/oxy-1-small" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6244608749229821 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5884593784818278 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36027190332326287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716442953020134 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4486666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5000831117021277 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ozone-ai/ozone-ai_0x-lite/9b5b23bc-44bb-4d47-91a2-18e23571743d.json b/leaderboard_data/HFOpenLLMv2/ozone-ai/ozone-ai_0x-lite/9b5b23bc-44bb-4d47-91a2-18e23571743d.json deleted file mode 100644 index 69059d21bf176958876258086872df83084fb2da..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ozone-ai/ozone-ai_0x-lite/9b5b23bc-44bb-4d47-91a2-18e23571743d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ozone-ai_0x-lite/1762652580.432846", - "retrieved_timestamp": "1762652580.432847", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ozone-ai/0x-lite", - "developer": "ozone-ai", - "inference_platform": "unknown", - "id": "ozone-ai/0x-lite" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7739874643723099 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6340580988016683 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5045317220543807 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31963087248322153 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4220625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5183676861702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ozone-research/ozone-research_Chirp-01/69a65ae3-71fe-4e33-be2d-20bc0c25969a.json b/leaderboard_data/HFOpenLLMv2/ozone-research/ozone-research_Chirp-01/69a65ae3-71fe-4e33-be2d-20bc0c25969a.json deleted file mode 100644 index 19fba924921a2b23fd13aa2216f609e6fbf5dec2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ozone-research/ozone-research_Chirp-01/69a65ae3-71fe-4e33-be2d-20bc0c25969a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ozone-research_Chirp-01/1762652580.433142", - "retrieved_timestamp": "1762652580.4331431", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ozone-research/Chirp-01", - "developer": "ozone-research", - "inference_platform": "unknown", - "id": "ozone-research/Chirp-01" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6347524568145853 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4649560260501419 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3466767371601209 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2718120805369128 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4487291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3508144946808511 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/paloalma/paloalma_ECE-TW3-JRGL-V1/d86238d3-3a4e-467a-8ce1-e6a4a903aa3b.json b/leaderboard_data/HFOpenLLMv2/paloalma/paloalma_ECE-TW3-JRGL-V1/d86238d3-3a4e-467a-8ce1-e6a4a903aa3b.json deleted file mode 100644 index a9ee98b1dbfadcb24c715e636cc184ab3c859e98..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/paloalma/paloalma_ECE-TW3-JRGL-V1/d86238d3-3a4e-467a-8ce1-e6a4a903aa3b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/paloalma_ECE-TW3-JRGL-V1/1762652580.433397", - "retrieved_timestamp": "1762652580.433398", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "paloalma/ECE-TW3-JRGL-V1", - "developer": "paloalma", - "inference_platform": "unknown", - "id": "paloalma/ECE-TW3-JRGL-V1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5534947273235016 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6283667540784627 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34731543624161076 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46208333333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.422124335106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 68.977 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/paloalma/paloalma_ECE-TW3-JRGL-V2/d8d1a5b1-cc9a-4af9-b95f-db78f7edf70e.json b/leaderboard_data/HFOpenLLMv2/paloalma/paloalma_ECE-TW3-JRGL-V2/d8d1a5b1-cc9a-4af9-b95f-db78f7edf70e.json deleted file mode 100644 index 2b579963849d7e709b92f023f725f89e824e7f68..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/paloalma/paloalma_ECE-TW3-JRGL-V2/d8d1a5b1-cc9a-4af9-b95f-db78f7edf70e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/paloalma_ECE-TW3-JRGL-V2/1762652580.433646", - "retrieved_timestamp": "1762652580.4336472", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "paloalma/ECE-TW3-JRGL-V2", - "developer": "paloalma", - "inference_platform": "unknown", - "id": "paloalma/ECE-TW3-JRGL-V2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2254894790267601 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6030988136029874 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18504531722054382 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47932291666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4587765957446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.288 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/paloalma/paloalma_ECE-TW3-JRGL-V5/9468fda5-a233-4d19-9a99-602e694f4a02.json b/leaderboard_data/HFOpenLLMv2/paloalma/paloalma_ECE-TW3-JRGL-V5/9468fda5-a233-4d19-9a99-602e694f4a02.json deleted file mode 100644 index f67c6514bed205fdb5797a2607f6611bb6bdab28..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/paloalma/paloalma_ECE-TW3-JRGL-V5/9468fda5-a233-4d19-9a99-602e694f4a02.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/paloalma_ECE-TW3-JRGL-V5/1762652580.433843", - "retrieved_timestamp": "1762652580.4338439", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "paloalma/ECE-TW3-JRGL-V5", - "developer": "paloalma", - "inference_platform": "unknown", - "id": "paloalma/ECE-TW3-JRGL-V5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4552509563513699 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6024712037668832 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18353474320241692 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3414429530201342 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4620520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46476063829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 72.289 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/paloalma/paloalma_Le_Triomphant-ECE-TW3/49f92222-f6cd-47e5-968d-10dc4345dd90.json b/leaderboard_data/HFOpenLLMv2/paloalma/paloalma_Le_Triomphant-ECE-TW3/49f92222-f6cd-47e5-968d-10dc4345dd90.json deleted file mode 100644 index 661f3ca6aa5ef97c0c21df1045ec033f01b14706..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/paloalma/paloalma_Le_Triomphant-ECE-TW3/49f92222-f6cd-47e5-968d-10dc4345dd90.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/paloalma_Le_Triomphant-ECE-TW3/1762652580.434039", - "retrieved_timestamp": "1762652580.434039", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "paloalma/Le_Triomphant-ECE-TW3", - "developer": "paloalma", - "inference_platform": "unknown", - "id": "paloalma/Le_Triomphant-ECE-TW3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5402055435134332 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6112057897556996 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19486404833836857 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4725 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.476313164893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 72.289 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/paloalma/paloalma_TW3-JRGL-v2/525f2e27-bd77-49e9-85db-61efddbdd186.json b/leaderboard_data/HFOpenLLMv2/paloalma/paloalma_TW3-JRGL-v2/525f2e27-bd77-49e9-85db-61efddbdd186.json deleted file mode 100644 index 3d5869922c0eeacc8026dd2046218d864e4d5f2b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/paloalma/paloalma_TW3-JRGL-v2/525f2e27-bd77-49e9-85db-61efddbdd186.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/paloalma_TW3-JRGL-v2/1762652580.43424", - "retrieved_timestamp": "1762652580.434241", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "paloalma/TW3-JRGL-v2", - "developer": "paloalma", - "inference_platform": "unknown", - "id": "paloalma/TW3-JRGL-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5316127874040878 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6137525505395743 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17900302114803626 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35906040268456374 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48583333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4857878989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 72.289 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_Al_Dente_v1_8b/9924f2bd-abe5-431c-aa06-be24952ca363.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_Al_Dente_v1_8b/9924f2bd-abe5-431c-aa06-be24952ca363.json deleted file mode 100644 index e85fb75be3dcd901b9ce32cf202e1d384beb7bd2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_Al_Dente_v1_8b/9924f2bd-abe5-431c-aa06-be24952ca363.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_Al_Dente_v1_8b/1762652580.434438", - "retrieved_timestamp": "1762652580.434439", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/Al_Dente_v1_8b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/Al_Dente_v1_8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3693721547715617 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48347371404380524 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3987083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2859873670212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_model_007_13b_v2/a108864f-40d6-492b-8440-1cbb5d87a5fe.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_model_007_13b_v2/a108864f-40d6-492b-8440-1cbb5d87a5fe.json deleted file mode 100644 index beaf1a1005ead63a49c95892d9d3a7ea103d97cb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_model_007_13b_v2/a108864f-40d6-492b-8440-1cbb5d87a5fe.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_model_007_13b_v2/1762652580.434693", - "retrieved_timestamp": "1762652580.4346938", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/model_007_13b_v2", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/model_007_13b_v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30564901129004374 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4702292766687601 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46109375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24609375 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_3b/bebbfd98-fdba-413d-9e7d-06af8bd4d5a7.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_3b/bebbfd98-fdba-413d-9e7d-06af8bd4d5a7.json deleted file mode 100644 index a1238a4562baddb117d96e960c309956d8585d3e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_3b/bebbfd98-fdba-413d-9e7d-06af8bd4d5a7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_3b/1762652580.434913", - "retrieved_timestamp": "1762652580.434913", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_3b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07421419611076388 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196070040004752 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3349270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11452792553191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.426 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_7b/773c97e1-0e43-46ae-a134-8a08ca9b5094.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_7b/773c97e1-0e43-46ae-a134-8a08ca9b5094.json deleted file mode 100644 index 3b8637f57171f36d59d71ffad5484e5d9b62309d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_7b/773c97e1-0e43-46ae-a134-8a08ca9b5094.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_7b/1762652580.435124", - "retrieved_timestamp": "1762652580.4351249", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_7b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04121619525082337 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3332228472650342 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36975 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12458444148936171 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v2_7b/036c4f96-2d08-40a1-968d-293e0b3a1ed0.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v2_7b/036c4f96-2d08-40a1-968d-293e0b3a1ed0.json deleted file mode 100644 index 59209398a9d1c342affd989294ff39cf653ea4b8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v2_7b/036c4f96-2d08-40a1-968d-293e0b3a1ed0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v2_7b/1762652580.435575", - "retrieved_timestamp": "1762652580.435576", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v2_7b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v2_7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13578859647956312 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35363417847864514 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24916107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35933333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1541722074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v3_13b/d3ba7ff3-e0d7-48e3-b63d-9648a193679f.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v3_13b/d3ba7ff3-e0d7-48e3-b63d-9648a193679f.json deleted file mode 100644 index 8c6901ec8f91694dcb5e9fbd0201437403fdb456..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v3_13b/d3ba7ff3-e0d7-48e3-b63d-9648a193679f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v3_13b/1762652580.435779", - "retrieved_timestamp": "1762652580.43578", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v3_13b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v3_13b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28966253983873896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4710970361474938 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45979166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23046875 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v3_70b/beae9826-35b2-4758-a20a-10c8402daa42.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v3_70b/beae9826-35b2-4758-a20a-10c8402daa42.json deleted file mode 100644 index e01373dcae016217e809d7f8e672aaedd652220d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v3_70b/beae9826-35b2-4758-a20a-10c8402daa42.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v3_70b/1762652580.43598", - "retrieved_timestamp": "1762652580.435981", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v3_70b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v3_70b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4014703209705803 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5949312065598904 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179530201342282 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5078541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3757480053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v3_7b/69cb8c68-5847-48f0-b2bd-0756ec761837.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v3_7b/69cb8c68-5847-48f0-b2bd-0756ec761837.json deleted file mode 100644 index 86b4450329bd82cbb09c150f76c0064fb8e0e0a6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v3_7b/69cb8c68-5847-48f0-b2bd-0756ec761837.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v3_7b/1762652580.436181", - "retrieved_timestamp": "1762652580.436182", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v3_7b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v3_7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2820937335159599 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4095332668279368 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24664429530201343 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49823958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20836103723404256 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v5_8b/12a231e0-deed-4d2b-9904-79a8b543d200.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v5_8b/12a231e0-deed-4d2b-9904-79a8b543d200.json deleted file mode 100644 index 3883087b986f8bab1fd97ef34d6934f9d82ed53d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v5_8b/12a231e0-deed-4d2b-9904-79a8b543d200.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v5_8b/1762652580.436376", - "retrieved_timestamp": "1762652580.436377", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v5_8b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v5_8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48060479527653294 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5064242853619262 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09894259818731117 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4000104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3075964095744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v5_8b_dpo/1dad9bda-fbc8-499b-aab0-29be59b6921d.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v5_8b_dpo/1dad9bda-fbc8-499b-aab0-29be59b6921d.json deleted file mode 100644 index ad7102b9e2f120916eb31cd1af97b91b44bd62a8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v5_8b_dpo/1dad9bda-fbc8-499b-aab0-29be59b6921d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v5_8b_dpo/1762652580.436573", - "retrieved_timestamp": "1762652580.436574", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v5_8b_dpo", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v5_8b_dpo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48964746871633935 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5074598658862709 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.389375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31158577127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v5_8b_orpo/cf3f79fc-1fe2-4b55-a808-5664cc1f1809.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v5_8b_orpo/cf3f79fc-1fe2-4b55-a808-5664cc1f1809.json deleted file mode 100644 index fd2704ca2d61d9ed33544dde72089e11b2a344fb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v5_8b_orpo/cf3f79fc-1fe2-4b55-a808-5664cc1f1809.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v5_8b_orpo/1762652580.436766", - "retrieved_timestamp": "1762652580.4367669", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v5_8b_orpo", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v5_8b_orpo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08243239050164675 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.496374377369289 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41312499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2947140957446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v6_8b/e45a0914-baee-4fd4-a231-3495b18db9a9.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v6_8b/e45a0914-baee-4fd4-a231-3495b18db9a9.json deleted file mode 100644 index 1767992d6b488a87a869fc66ee17ef3525ac182f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v6_8b/e45a0914-baee-4fd4-a231-3495b18db9a9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v6_8b/1762652580.436963", - "retrieved_timestamp": "1762652580.436963", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v6_8b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v6_8b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011116060940526692 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30286959112076134 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0037764350453172208 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23825503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3554583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1124501329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v6_8b_dpo/3e875ab6-6065-4400-8038-0fe6437f44d5.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v6_8b_dpo/3e875ab6-6065-4400-8038-0fe6437f44d5.json deleted file mode 100644 index 7310f44476ea51e98e2c23f90fc65f74326fb972..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v6_8b_dpo/3e875ab6-6065-4400-8038-0fe6437f44d5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v6_8b_dpo/1762652580.43716", - "retrieved_timestamp": "1762652580.437161", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v6_8b_dpo", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v6_8b_dpo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3882564927725103 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.520280774453148 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06117824773413897 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40903125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.359624335106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v7_72b/702f1485-2941-4e27-9c96-11cee2449df8.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v7_72b/702f1485-2941-4e27-9c96-11cee2449df8.json deleted file mode 100644 index 1ef5db3306c6239a4c62b84a3f50c8c64b3192cd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v7_72b/702f1485-2941-4e27-9c96-11cee2449df8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v7_72b/1762652580.437353", - "retrieved_timestamp": "1762652580.437354", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v7_72b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v7_72b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5929622291076566 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6842301988001044 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3850671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5070416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5621675531914894 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v7_7b/f801b633-5767-4b74-a0db-e474c9349820.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v7_7b/f801b633-5767-4b74-a0db-e474c9349820.json deleted file mode 100644 index 3d68c65631354127c6c4f267894a9aa71e2db618..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v7_7b/f801b633-5767-4b74-a0db-e474c9349820.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v7_7b/1762652580.437545", - "retrieved_timestamp": "1762652580.437546", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v7_7b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v7_7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4387646998851935 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5274909601771501 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43597916666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4167220744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v8_1_70b/02201ae1-ec65-496c-bfdb-0dec8aa5308d.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v8_1_70b/02201ae1-ec65-496c-bfdb-0dec8aa5308d.json deleted file mode 100644 index 17ae6058ee7b75f1150200058bd07d83a83d1244..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v8_1_70b/02201ae1-ec65-496c-bfdb-0dec8aa5308d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v8_1_70b/1762652580.4377441", - "retrieved_timestamp": "1762652580.4377449", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v8_1_70b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v8_1_70b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8571434903832941 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6781305630707934 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3527190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43288590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44370833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49833776595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_0_3B-Instruct/bc38a266-c3bd-4ecf-8149-6b26bb32803b.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_0_3B-Instruct/bc38a266-c3bd-4ecf-8149-6b26bb32803b.json deleted file mode 100644 index ddd4692f7ee19c970d2a97b3a3364099adc5e45a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_0_3B-Instruct/bc38a266-c3bd-4ecf-8149-6b26bb32803b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_0_3B-Instruct/1762652580.437941", - "retrieved_timestamp": "1762652580.437942", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v9_0_3B-Instruct", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v9_0_3B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5753766672429155 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4412946064233128 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36590625000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2603058510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_1_1B-Instruct/65d0aca2-06ae-4a09-9fb2-2bb54939a554.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_1_1B-Instruct/65d0aca2-06ae-4a09-9fb2-2bb54939a554.json deleted file mode 100644 index 0c70775619628b77fd96333b44da1e296cc89887..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_1_1B-Instruct/65d0aca2-06ae-4a09-9fb2-2bb54939a554.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_1_1B-Instruct/1762652580.438177", - "retrieved_timestamp": "1762652580.438178", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v9_1_1B-Instruct", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v9_1_1B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3629270336041702 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3205118362595434 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04607250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3380625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13738364361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_2_14B/e10e45b8-0d37-4905-9ebf-acc7922b7ea3.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_2_14B/e10e45b8-0d37-4905-9ebf-acc7922b7ea3.json deleted file mode 100644 index d5573ac5e9b15ac788bf62a37e55a230d8d2076f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_2_14B/e10e45b8-0d37-4905-9ebf-acc7922b7ea3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_2_14B/1762652580.438377", - "retrieved_timestamp": "1762652580.438378", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v9_2_14B", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v9_2_14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7780588837617521 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6856329737542378 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29531722054380666 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37416107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47030208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5255152925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_2_70b/69093327-3726-469d-9750-b9fa39423310.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_2_70b/69093327-3726-469d-9750-b9fa39423310.json deleted file mode 100644 index cc570a6d57062504fa1eacfbb1bbe65257c1cda4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_2_70b/69093327-3726-469d-9750-b9fa39423310.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_2_70b/1762652580.438577", - "retrieved_timestamp": "1762652580.438578", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v9_2_70b", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v9_2_70b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8382591523823455 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6744868732778627 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2938066465256798 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47098958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48213098404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_4_70B/e3746ac6-3ee4-4d95-b800-509bed07aec3.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_4_70B/e3746ac6-3ee4-4d95-b800-509bed07aec3.json deleted file mode 100644 index fccb5070751940801d4bd0f162f9ea6395691227..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_4_70B/e3746ac6-3ee4-4d95-b800-509bed07aec3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_4_70B/1762652580.438774", - "retrieved_timestamp": "1762652580.438774", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v9_4_70B", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v9_4_70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8014645584826039 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6418899297276105 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36577181208053694 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4647291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45362367021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_5_1B-Instruct/2f2f821b-037b-4f3f-87f6-16703c0dc61a.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_5_1B-Instruct/2f2f821b-037b-4f3f-87f6-16703c0dc61a.json deleted file mode 100644 index 36629e1ffce78094c78da54ab2c5e47367b30e29..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_5_1B-Instruct/2f2f821b-037b-4f3f-87f6-16703c0dc61a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_5_1B-Instruct/1762652580.438983", - "retrieved_timestamp": "1762652580.438984", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v9_5_1B-Instruct", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v9_5_1B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46379384477630464 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3337001077145985 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31815625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13696808510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_5_1B-Instruct_preview/7836190d-33df-45c2-b020-8ccec01de1f3.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_5_1B-Instruct_preview/7836190d-33df-45c2-b020-8ccec01de1f3.json deleted file mode 100644 index 91ca5fdf8dc9f1ec9ed3318522602c35b9d97d44..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_5_1B-Instruct_preview/7836190d-33df-45c2-b020-8ccec01de1f3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_5_1B-Instruct_preview/1762652580.439178", - "retrieved_timestamp": "1762652580.439179", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v9_5_1B-Instruct_preview", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v9_5_1B-Instruct_preview" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3935768206137493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32769514793198123 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33945833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13272938829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_5_3B-Instruct/2ff28335-81a0-4d61-b221-a7edb877da4a.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_5_3B-Instruct/2ff28335-81a0-4d61-b221-a7edb877da4a.json deleted file mode 100644 index a2a5ea9104dfdd9671e800274b7514426287cc46..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_5_3B-Instruct/2ff28335-81a0-4d61-b221-a7edb877da4a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_5_3B-Instruct/1762652580.439394", - "retrieved_timestamp": "1762652580.4393952", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v9_5_3B-Instruct", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v9_5_3B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7207066140063919 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44963802133275826 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1321752265861027 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2869127516778524 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4269895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2882313829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_6_1B-Instruct/332f06db-35f1-4759-b3f8-973b1fe6fb9e.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_6_1B-Instruct/332f06db-35f1-4759-b3f8-973b1fe6fb9e.json deleted file mode 100644 index 61677f447c7ce807f43c42f557721054e1150474..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_6_1B-Instruct/332f06db-35f1-4759-b3f8-973b1fe6fb9e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_6_1B-Instruct/1762652580.439626", - "retrieved_timestamp": "1762652580.439627", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v9_6_1B-Instruct", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v9_6_1B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6085741388404988 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3561349568441982 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0770392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33955208333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18085106382978725 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_6_3B-Instruct/1cc45753-aeed-4804-a6da-413437dbb940.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_6_3B-Instruct/1cc45753-aeed-4804-a6da-413437dbb940.json deleted file mode 100644 index e286fe444b3787a494edadc348d2ec03813f16c5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_6_3B-Instruct/1cc45753-aeed-4804-a6da-413437dbb940.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_6_3B-Instruct/1762652580.439853", - "retrieved_timestamp": "1762652580.439853", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v9_6_3B-Instruct", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v9_6_3B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7316475839660989 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45683272658133456 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13293051359516617 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4067708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28507313829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_7_1B-Instruct/fad200e0-05bb-42d7-b7f3-caba938ca09d.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_7_1B-Instruct/fad200e0-05bb-42d7-b7f3-caba938ca09d.json deleted file mode 100644 index 19c14b901ed6a635334cd4f19d46fb04ba2c60f7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_7_1B-Instruct/fad200e0-05bb-42d7-b7f3-caba938ca09d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_7_1B-Instruct/1762652580.4400692", - "retrieved_timestamp": "1762652580.44007", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v9_7_1B-Instruct", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v9_7_1B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5610136659618701 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3181526961435924 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0445619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35269791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1344747340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_7_3B-Instruct/42a8b694-ef8f-47d2-8da3-e4db453641b3.json b/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_7_3B-Instruct/42a8b694-ef8f-47d2-8da3-e4db453641b3.json deleted file mode 100644 index ba68896cae635f49f112b95d3d9037c63d0619b3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pankajmathur/pankajmathur_orca_mini_v9_7_3B-Instruct/42a8b694-ef8f-47d2-8da3-e4db453641b3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pankajmathur_orca_mini_v9_7_3B-Instruct/1762652580.44028", - "retrieved_timestamp": "1762652580.4402812", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pankajmathur/orca_mini_v9_7_3B-Instruct", - "developer": "pankajmathur", - "inference_platform": "unknown", - "id": "pankajmathur/orca_mini_v9_7_3B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5618381450107935 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3297133908231881 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.361875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13746675531914893 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/paulml/paulml_ECE-ILAB-Q1/83024ec4-e4a4-4dd3-adf4-654c90c3a271.json b/leaderboard_data/HFOpenLLMv2/paulml/paulml_ECE-ILAB-Q1/83024ec4-e4a4-4dd3-adf4-654c90c3a271.json deleted file mode 100644 index 3012fb1b961d6a5b4b73506617d119743c37950d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/paulml/paulml_ECE-ILAB-Q1/83024ec4-e4a4-4dd3-adf4-654c90c3a271.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/paulml_ECE-ILAB-Q1/1762652580.440484", - "retrieved_timestamp": "1762652580.440484", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "paulml/ECE-ILAB-Q1", - "developer": "paulml", - "inference_platform": "unknown", - "id": "paulml/ECE-ILAB-Q1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7864521691334547 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6717755530661759 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3557401812688822 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38674496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46137500000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.550531914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pints-ai/pints-ai_1.5-Pints-16K-v0.1/8dff3ec1-066f-4f5f-ac57-879d693ee3fb.json b/leaderboard_data/HFOpenLLMv2/pints-ai/pints-ai_1.5-Pints-16K-v0.1/8dff3ec1-066f-4f5f-ac57-879d693ee3fb.json deleted file mode 100644 index 8dcfca1b38c00dfcecd9e84157ff1db1f9a9acc1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pints-ai/pints-ai_1.5-Pints-16K-v0.1/8dff3ec1-066f-4f5f-ac57-879d693ee3fb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pints-ai_1.5-Pints-16K-v0.1/1762652580.4407208", - "retrieved_timestamp": "1762652580.440722", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pints-ai/1.5-Pints-16K-v0.1", - "developer": "pints-ai", - "inference_platform": "unknown", - "id": "pints-ai/1.5-Pints-16K-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1635914927946737 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3133077677150869 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23573825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.357875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1118683510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.566 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/pints-ai/pints-ai_1.5-Pints-2K-v0.1/2ed76213-e562-4b36-bf46-93f09df88ee9.json b/leaderboard_data/HFOpenLLMv2/pints-ai/pints-ai_1.5-Pints-2K-v0.1/2ed76213-e562-4b36-bf46-93f09df88ee9.json deleted file mode 100644 index dd565b0d32a8676db7325ebfd42e10c820df52c4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/pints-ai/pints-ai_1.5-Pints-2K-v0.1/2ed76213-e562-4b36-bf46-93f09df88ee9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/pints-ai_1.5-Pints-2K-v0.1/1762652580.4409652", - "retrieved_timestamp": "1762652580.440966", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "pints-ai/1.5-Pints-2K-v0.1", - "developer": "pints-ai", - "inference_platform": "unknown", - "id": "pints-ai/1.5-Pints-2K-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17615593292463996 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29801943389750435 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35018749999999993 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11037234042553191 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.566 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/piotr25691/piotr25691_thea-3b-25r/d8fefd3b-78e6-472e-854c-15f40ace7878.json b/leaderboard_data/HFOpenLLMv2/piotr25691/piotr25691_thea-3b-25r/d8fefd3b-78e6-472e-854c-15f40ace7878.json deleted file mode 100644 index 28a053e0ddd9cb130c58c4b087e8d33670ce76cf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/piotr25691/piotr25691_thea-3b-25r/d8fefd3b-78e6-472e-854c-15f40ace7878.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/piotr25691_thea-3b-25r/1762652580.44117", - "retrieved_timestamp": "1762652580.441171", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "piotr25691/thea-3b-25r", - "developer": "piotr25691", - "inference_platform": "unknown", - "id": "piotr25691/thea-3b-25r" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7344202272193336 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44844100293649863 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1782477341389728 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33145833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3182347074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/piotr25691/piotr25691_thea-c-3b-25r/828bcb36-3902-4157-9323-a5dcf592a795.json b/leaderboard_data/HFOpenLLMv2/piotr25691/piotr25691_thea-c-3b-25r/828bcb36-3902-4157-9323-a5dcf592a795.json deleted file mode 100644 index f0b23ec88e086f1fca72aeeb5c6fa2e2948434c1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/piotr25691/piotr25691_thea-c-3b-25r/828bcb36-3902-4157-9323-a5dcf592a795.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/piotr25691_thea-c-3b-25r/1762652580.441559", - "retrieved_timestamp": "1762652580.441561", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "piotr25691/thea-c-3b-25r", - "developer": "piotr25691", - "inference_platform": "unknown", - "id": "piotr25691/thea-c-3b-25r" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7401904723910335 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4532410175874399 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15256797583081572 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33148958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3178191489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/piotr25691/piotr25691_thea-rp-3b-25r/cd34091b-2639-476c-8419-e6c327cfabc7.json b/leaderboard_data/HFOpenLLMv2/piotr25691/piotr25691_thea-rp-3b-25r/cd34091b-2639-476c-8419-e6c327cfabc7.json deleted file mode 100644 index ec9258bd6d92100f06359571554b4240c7195310..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/piotr25691/piotr25691_thea-rp-3b-25r/cd34091b-2639-476c-8419-e6c327cfabc7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/piotr25691_thea-rp-3b-25r/1762652580.441917", - "retrieved_timestamp": "1762652580.441918", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "piotr25691/thea-rp-3b-25r", - "developer": "piotr25691", - "inference_platform": "unknown", - "id": "piotr25691/thea-rp-3b-25r" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6577835698169745 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4390291036559586 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13217522658610273 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.381875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30601728723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prince-canuma/prince-canuma_Ministral-8B-Instruct-2410-HF/f98bc033-55c9-45c1-a101-3881507bb733.json b/leaderboard_data/HFOpenLLMv2/prince-canuma/prince-canuma_Ministral-8B-Instruct-2410-HF/f98bc033-55c9-45c1-a101-3881507bb733.json deleted file mode 100644 index 2f6e1738e7caa6c92749c3a6e8245b4a539cf5d4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prince-canuma/prince-canuma_Ministral-8B-Instruct-2410-HF/f98bc033-55c9-45c1-a101-3881507bb733.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prince-canuma_Ministral-8B-Instruct-2410-HF/1762652580.442474", - "retrieved_timestamp": "1762652580.442475", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prince-canuma/Ministral-8B-Instruct-2410-HF", - "developer": "prince-canuma", - "inference_platform": "unknown", - "id": "prince-canuma/Ministral-8B-Instruct-2410-HF" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5911636679565775 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4585611339334732 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19184290030211482 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32978723404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.02 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-8B-ProLong-512k-Instruct/72eccc9b-df63-4b2f-8975-a1c89940802c.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-8B-ProLong-512k-Instruct/72eccc9b-df63-4b2f-8975-a1c89940802c.json deleted file mode 100644 index 9997aa1b2f3dae9c6d5c4b44fddc8694a9bd4893..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-8B-ProLong-512k-Instruct/72eccc9b-df63-4b2f-8975-a1c89940802c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-8B-ProLong-512k-Instruct/1762652580.4434712", - "retrieved_timestamp": "1762652580.443472", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-8B-ProLong-512k-Instruct", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-8B-ProLong-512k-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3977734632996006 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49830327201612584 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.425 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3246343085106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-8B-ProLong-512k-Instruct/e30fead2-6516-480f-abd8-6ad0713cb053.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-8B-ProLong-512k-Instruct/e30fead2-6516-480f-abd8-6ad0713cb053.json deleted file mode 100644 index 0ede079bfb917f7f6a94ab6959eeec939b0cc8d9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-8B-ProLong-512k-Instruct/e30fead2-6516-480f-abd8-6ad0713cb053.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-8B-ProLong-512k-Instruct/1762652580.4431858", - "retrieved_timestamp": "1762652580.443187", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-8B-ProLong-512k-Instruct", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-8B-ProLong-512k-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5508218194390884 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5028310716285619 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42664583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32313829787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-8B-ProLong-64k-Instruct/9c801b4e-228b-42a8-a7f7-ea2bf125d716.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-8B-ProLong-64k-Instruct/9c801b4e-228b-42a8-a7f7-ea2bf125d716.json deleted file mode 100644 index c5da9955fc63c83abe07d8a546f28ce9467ccf55..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-8B-ProLong-64k-Instruct/9c801b4e-228b-42a8-a7f7-ea2bf125d716.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-8B-ProLong-64k-Instruct/1762652580.443907", - "retrieved_timestamp": "1762652580.4439082", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-8B-ProLong-64k-Instruct", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-8B-ProLong-64k-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5563172382611471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5083040804243396 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2953020134228188 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43969791666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32746010638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-CPO/342c7c0f-92f0-4296-8e0a-519724133bb5.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-CPO/342c7c0f-92f0-4296-8e0a-519724133bb5.json deleted file mode 100644 index ea5dfb4702e13eab832b29d7fd1a1d34c79d39be..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-CPO/342c7c0f-92f0-4296-8e0a-519724133bb5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-CPO/1762652580.444415", - "retrieved_timestamp": "1762652580.444416", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-CPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Base-8B-SFT-CPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37034623687371726 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4594875922440002 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3608541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2976230053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-DPO/8afa4f43-96fb-46b1-84e8-bf98928aa484.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-DPO/8afa4f43-96fb-46b1-84e8-bf98928aa484.json deleted file mode 100644 index 197310cad30f12b90c5bc203c29dbf2caca174ea..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-DPO/8afa4f43-96fb-46b1-84e8-bf98928aa484.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-DPO/1762652580.444683", - "retrieved_timestamp": "1762652580.444684", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-DPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Base-8B-SFT-DPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41111251479407973 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46658506064913546 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38673958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3078457446808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-IPO/71d5525f-c257-4b88-b84d-d75b3a8328fc.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-IPO/71d5525f-c257-4b88-b84d-d75b3a8328fc.json deleted file mode 100644 index 8d9234eb1fa2ae1ec0e33a5a8c87af7cc54a8a60..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-IPO/71d5525f-c257-4b88-b84d-d75b3a8328fc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-IPO/1762652580.444937", - "retrieved_timestamp": "1762652580.444937", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-IPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Base-8B-SFT-IPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4486562321307464 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4690068582318399 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3919479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3115026595744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-KTO/6c0d909f-ee4f-4e1a-8db9-abf1920359ed.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-KTO/6c0d909f-ee4f-4e1a-8db9-abf1920359ed.json deleted file mode 100644 index e127fd4119839ab3cf4fc11bc1883871729a6f51..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-KTO/6c0d909f-ee4f-4e1a-8db9-abf1920359ed.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-KTO/1762652580.4452229", - "retrieved_timestamp": "1762652580.445225", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-KTO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Base-8B-SFT-KTO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4522533544329047 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4692852292721417 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.052870090634441085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3841979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3054355053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-ORPO/ba821a1c-3b8e-4952-9f7b-b1f18923c4e7.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-ORPO/ba821a1c-3b8e-4952-9f7b-b1f18923c4e7.json deleted file mode 100644 index 645b2c79884dd832a02cc1bff7e81f6c78258d03..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-ORPO/ba821a1c-3b8e-4952-9f7b-b1f18923c4e7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-ORPO/1762652580.445469", - "retrieved_timestamp": "1762652580.4454699", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-ORPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Base-8B-SFT-ORPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45165383404921167 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47340573024653915 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3706770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30826130319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-RDPO/985ac874-e7eb-4431-81c2-a79f3865c696.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-RDPO/985ac874-e7eb-4431-81c2-a79f3865c696.json deleted file mode 100644 index 7cb36b1ef47cdfd967d792386824f8ec502a48af..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-RDPO/985ac874-e7eb-4431-81c2-a79f3865c696.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-RDPO/1762652580.445683", - "retrieved_timestamp": "1762652580.445684", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-RDPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Base-8B-SFT-RDPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4480068440626427 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46620140448752295 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05740181268882175 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3062080536912752 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4027395833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30144614361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-RRHF/cc9fb769-3d0b-4e53-9942-d4f99203a629.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-RRHF/cc9fb769-3d0b-4e53-9942-d4f99203a629.json deleted file mode 100644 index 60e21e9e9c9cbbc453771876b88c08671c4625c0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-RRHF/cc9fb769-3d0b-4e53-9942-d4f99203a629.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-RRHF/1762652580.445896", - "retrieved_timestamp": "1762652580.445896", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-RRHF", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Base-8B-SFT-RRHF" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3357247658435174 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4520360167602379 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37222916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2888962765957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-SLiC-HF/596f4d11-f091-42c3-9f1e-b95e0ba6dbd9.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-SLiC-HF/596f4d11-f091-42c3-9f1e-b95e0ba6dbd9.json deleted file mode 100644 index 3434be841a487713132c1c55a96a9f5a7bde49d5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-SLiC-HF/596f4d11-f091-42c3-9f1e-b95e0ba6dbd9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-SLiC-HF/1762652580.4460979", - "retrieved_timestamp": "1762652580.446099", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-SLiC-HF", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Base-8B-SFT-SLiC-HF" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4890479483326463 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4704075127777334 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40909375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30634973404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-SimPO/314cfcd7-674a-49d2-adf5-6d45c30e2382.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-SimPO/314cfcd7-674a-49d2-adf5-6d45c30e2382.json deleted file mode 100644 index 744a098d2b796369d3aaad66fe382528fff59d6b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT-SimPO/314cfcd7-674a-49d2-adf5-6d45c30e2382.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT-SimPO/1762652580.446312", - "retrieved_timestamp": "1762652580.446312", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT-SimPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Base-8B-SFT-SimPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4685401401614383 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47412507033960827 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41268750000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31050531914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT/494df3f9-7ce9-4f81-99c4-e6100d6e4187.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT/494df3f9-7ce9-4f81-99c4-e6100d6e4187.json deleted file mode 100644 index 3be46eca1da5013306e553d158223dcaed973ac7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Base-8B-SFT/494df3f9-7ce9-4f81-99c4-e6100d6e4187.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Base-8B-SFT/1762652580.444184", - "retrieved_timestamp": "1762652580.444185", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Base-8B-SFT", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Base-8B-SFT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27959591661236627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.464303802632615 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4117916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3093417553191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-CPO-v0.2/2de21869-2851-43f8-b5c3-a4b9e0e6e3ac.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-CPO-v0.2/2de21869-2851-43f8-b5c3-a4b9e0e6e3ac.json deleted file mode 100644 index 92bd2dd8b2de42562814f5ae2c3d1f7387b4dc01..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-CPO-v0.2/2de21869-2851-43f8-b5c3-a4b9e0e6e3ac.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-CPO-v0.2/1762652580.44678", - "retrieved_timestamp": "1762652580.446781", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-CPO-v0.2", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-CPO-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7505817896514582 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5026669871217129 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10800604229607251 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36190625000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37059507978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-CPO/95eb37c8-2a58-45e3-bd86-2c305e3cb5dd.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-CPO/95eb37c8-2a58-45e3-bd86-2c305e3cb5dd.json deleted file mode 100644 index 2ac326aa363329178fe9f9d66c799850fbf0bd30..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-CPO/95eb37c8-2a58-45e3-bd86-2c305e3cb5dd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-CPO/1762652580.4465249", - "retrieved_timestamp": "1762652580.446526", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-CPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-CPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7292993701157373 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4998793158888361 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09894259818731117 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35139583333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36519281914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-DPO-v0.2/6ae028c9-19d9-447b-93c1-c4548aef84f9.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-DPO-v0.2/6ae028c9-19d9-447b-93c1-c4548aef84f9.json deleted file mode 100644 index 67cfe7ddd7fba8f299015ac67cb70dfb434c1763..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-DPO-v0.2/6ae028c9-19d9-447b-93c1-c4548aef84f9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-DPO-v0.2/1762652580.447217", - "retrieved_timestamp": "1762652580.447217", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-DPO-v0.2", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-DPO-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7208063493752133 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.505620320855615 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08987915407854985 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3844479166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37691156914893614 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-DPO/81c7a3df-7e92-4efa-a323-51ea3e0a4fa6.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-DPO/81c7a3df-7e92-4efa-a323-51ea3e0a4fa6.json deleted file mode 100644 index 8a27fb91815f75ee64859ccd4dcbf20a78ca4c03..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-DPO/81c7a3df-7e92-4efa-a323-51ea3e0a4fa6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-DPO/1762652580.447003", - "retrieved_timestamp": "1762652580.447003", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-DPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-DPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6757436934001781 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4991303079139502 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27181208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37381250000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36652260638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-KTO-v0.2/5f35c42b-2d34-42bc-b94e-127a678cad2c.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-KTO-v0.2/5f35c42b-2d34-42bc-b94e-127a678cad2c.json deleted file mode 100644 index cbfd096e9c1cd9d17340749dfc11e60bf2d0d3b2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-KTO-v0.2/5f35c42b-2d34-42bc-b94e-127a678cad2c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-KTO-v0.2/1762652580.447652", - "retrieved_timestamp": "1762652580.447653", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-KTO-v0.2", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-KTO-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7290245437660962 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5079766897761946 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37775 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3667719414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-KTO/e8602fbb-422c-464e-87f4-79c6e1a4afcf.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-KTO/e8602fbb-422c-464e-87f4-79c6e1a4afcf.json deleted file mode 100644 index d081bea14ad8ee46ecf3180361f86429e42bff29..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-KTO/e8602fbb-422c-464e-87f4-79c6e1a4afcf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-KTO/1762652580.4474308", - "retrieved_timestamp": "1762652580.447432", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-KTO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-KTO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6864098370102439 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4981903187457697 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07250755287009064 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36984374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35987367021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-ORPO-v0.2/28bf3b2a-6c0c-4994-aaf5-80b67d82a955.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-ORPO-v0.2/28bf3b2a-6c0c-4994-aaf5-80b67d82a955.json deleted file mode 100644 index 1361dbbc7404ecb0adcc08eeb49ec86e112d348b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-ORPO-v0.2/28bf3b2a-6c0c-4994-aaf5-80b67d82a955.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-ORPO-v0.2/1762652580.448072", - "retrieved_timestamp": "1762652580.448073", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-ORPO-v0.2", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-ORPO-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7633213207622442 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.507835231782556 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37796874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37308843085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-ORPO/8789e9aa-5cfb-4eca-9795-540c5a9b4bb4.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-ORPO/8789e9aa-5cfb-4eca-9795-540c5a9b4bb4.json deleted file mode 100644 index c301a8aa39459c860f9559fac2e34611fd6e0fc5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-ORPO/8789e9aa-5cfb-4eca-9795-540c5a9b4bb4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-ORPO/1762652580.447865", - "retrieved_timestamp": "1762652580.4478662", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-ORPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-ORPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.712813113649561 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5001206199104097 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35018750000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36461103723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-RDPO-v0.2/1c3ea099-8b3b-4184-9f30-e7cdeea8f24e.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-RDPO-v0.2/1c3ea099-8b3b-4184-9f30-e7cdeea8f24e.json deleted file mode 100644 index 74893411516079b82a3a72ecc02ca75ad83f1eb3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-RDPO-v0.2/1c3ea099-8b3b-4184-9f30-e7cdeea8f24e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-RDPO-v0.2/1762652580.448503", - "retrieved_timestamp": "1762652580.448504", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-RDPO-v0.2", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-RDPO-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7076922565459647 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5049218189829557 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08685800604229607 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3804479166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37741023936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-RDPO/041d45dd-c371-4e9c-9cda-a63e3d7a1b2d.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-RDPO/041d45dd-c371-4e9c-9cda-a63e3d7a1b2d.json deleted file mode 100644 index ecf3cdff5453191339685b1b85979fde63b9b101..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-RDPO/041d45dd-c371-4e9c-9cda-a63e3d7a1b2d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-RDPO/1762652580.448289", - "retrieved_timestamp": "1762652580.44829", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-RDPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-RDPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6660017642078574 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5033626077797596 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3752083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36070478723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-RRHF-v0.2/bc221748-c03b-4fee-9147-8f63b0017f0c.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-RRHF-v0.2/bc221748-c03b-4fee-9147-8f63b0017f0c.json deleted file mode 100644 index 8db1ef1b57afb04e1be469231bce2f43affa76de..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-RRHF-v0.2/bc221748-c03b-4fee-9147-8f63b0017f0c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-RRHF-v0.2/1762652580.4489532", - "retrieved_timestamp": "1762652580.448954", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-RRHF-v0.2", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-RRHF-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.712488419615509 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49838952572927536 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08761329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37378125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3482380319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-RRHF/e93eff52-c6e1-474e-8089-f672000fe1e5.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-RRHF/e93eff52-c6e1-474e-8089-f672000fe1e5.json deleted file mode 100644 index 783e002ccdb8f781e5b0043a092d659f8354900c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-RRHF/e93eff52-c6e1-474e-8089-f672000fe1e5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-RRHF/1762652580.4487302", - "retrieved_timestamp": "1762652580.448731", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-RRHF", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-RRHF" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7274509412802475 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49105468765647214 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09667673716012085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3475520833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36436170212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-SLiC-HF-v0.2/5a5746dd-0270-4151-b774-8eaa6860d5e0.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-SLiC-HF-v0.2/5a5746dd-0270-4151-b774-8eaa6860d5e0.json deleted file mode 100644 index 92ac74a92b1c94f78ee9afa54f1217b0455579a4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-SLiC-HF-v0.2/5a5746dd-0270-4151-b774-8eaa6860d5e0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-SLiC-HF-v0.2/1762652580.4493709", - "retrieved_timestamp": "1762652580.4493718", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-SLiC-HF-v0.2", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-SLiC-HF-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7109646848140712 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49838952572927536 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08761329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37378125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3482380319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-SLiC-HF/aaa9cd01-cca9-489c-91e0-79ff026eb258.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-SLiC-HF/aaa9cd01-cca9-489c-91e0-79ff026eb258.json deleted file mode 100644 index 8375e292a48638319ca06ec643b4922973561f65..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-SLiC-HF/aaa9cd01-cca9-489c-91e0-79ff026eb258.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-SLiC-HF/1762652580.449163", - "retrieved_timestamp": "1762652580.449164", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-SLiC-HF", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-SLiC-HF" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7399655137258031 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5029422936734547 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3722916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35846077127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-SimPO-v0.2/5e499da1-f8c1-4830-828c-7d4013ea0243.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-SimPO-v0.2/5e499da1-f8c1-4830-828c-7d4013ea0243.json deleted file mode 100644 index e99041135d0e1bd5e26fb6bcaa80aaf87c207ec5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-SimPO-v0.2/5e499da1-f8c1-4830-828c-7d4013ea0243.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-SimPO-v0.2/1762652580.44994", - "retrieved_timestamp": "1762652580.449941", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-SimPO-v0.2", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-SimPO-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6808645505037745 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.503833834044343 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3988020833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36220079787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-SimPO/fcd2c5e3-ebfd-4c1c-ac8a-d28ec08f1bf2.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-SimPO/fcd2c5e3-ebfd-4c1c-ac8a-d28ec08f1bf2.json deleted file mode 100644 index fbea90fc07bf0b72fbe4f8d8ff50e92d76ade28a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Llama-3-Instruct-8B-SimPO/fcd2c5e3-ebfd-4c1c-ac8a-d28ec08f1bf2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Llama-3-Instruct-8B-SimPO/1762652580.449708", - "retrieved_timestamp": "1762652580.449709", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Llama-3-Instruct-8B-SimPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Llama-3-Instruct-8B-SimPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6503898544750152 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48446848524905367 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08610271903323263 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39483333333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3489029255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Base-SFT-CPO/4c2ab1ed-8177-4518-ae3d-754f9711369d.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Base-SFT-CPO/4c2ab1ed-8177-4518-ae3d-754f9711369d.json deleted file mode 100644 index a64f55258d1f956dcb8ac7c454a166a6d75f7c74..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Base-SFT-CPO/4c2ab1ed-8177-4518-ae3d-754f9711369d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-CPO/1762652580.45017", - "retrieved_timestamp": "1762652580.450171", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Base-SFT-CPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Base-SFT-CPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46549267055856236 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43821512506663574 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26512632978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Base-SFT-DPO/133d7669-db7f-47b6-b838-51b9577a9e68.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Base-SFT-DPO/133d7669-db7f-47b6-b838-51b9577a9e68.json deleted file mode 100644 index 8b8dba23e911f0a6f55fc7880e6754fe23c22a9e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Base-SFT-DPO/133d7669-db7f-47b6-b838-51b9577a9e68.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-DPO/1762652580.450392", - "retrieved_timestamp": "1762652580.4503932", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Base-SFT-DPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Base-SFT-DPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44033830237104216 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43501123979612694 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41222916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26454454787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Base-SFT-IPO/b402d383-b80e-4cd9-b2ec-a1e435f67ac5.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Base-SFT-IPO/b402d383-b80e-4cd9-b2ec-a1e435f67ac5.json deleted file mode 100644 index bd624ee84b96935fbb1d42d571bf48bafc2e80fa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Base-SFT-IPO/b402d383-b80e-4cd9-b2ec-a1e435f67ac5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-IPO/1762652580.4506009", - "retrieved_timestamp": "1762652580.450602", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Base-SFT-IPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Base-SFT-IPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48295300912689443 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4458024605899282 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37762500000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2791722074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Base-SFT-KTO/a0048817-4f45-4bca-ac1a-b7e0c25bd7ab.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Base-SFT-KTO/a0048817-4f45-4bca-ac1a-b7e0c25bd7ab.json deleted file mode 100644 index 1a24da1d0abc06125d0d286ee9b8ed32aa95b7df..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Base-SFT-KTO/a0048817-4f45-4bca-ac1a-b7e0c25bd7ab.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-KTO/1762652580.450817", - "retrieved_timestamp": "1762652580.450818", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Base-SFT-KTO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Base-SFT-KTO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.478481540091402 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44764334464528677 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43678124999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28715093085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Base-SFT-RDPO/034fa9fa-4103-428d-a50e-b117ef5e0726.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Base-SFT-RDPO/034fa9fa-4103-428d-a50e-b117ef5e0726.json deleted file mode 100644 index 69584298b732fe9ccc216f7928bf66ccd7bc802c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Base-SFT-RDPO/034fa9fa-4103-428d-a50e-b117ef5e0726.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-RDPO/1762652580.451031", - "retrieved_timestamp": "1762652580.4510322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Base-SFT-RDPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Base-SFT-RDPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46064663980460735 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44395328626924213 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02190332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3579375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27767619680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Base-SFT-RRHF/fbbd671a-3005-448a-bc15-718ba23bcf72.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Base-SFT-RRHF/fbbd671a-3005-448a-bc15-718ba23bcf72.json deleted file mode 100644 index ecfe62500920c9cd4fbfa93a29c1fb34b600549b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Base-SFT-RRHF/fbbd671a-3005-448a-bc15-718ba23bcf72.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-RRHF/1762652580.451245", - "retrieved_timestamp": "1762652580.451246", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Base-SFT-RRHF", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Base-SFT-RRHF" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44066299640509404 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42805937403716016 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4186770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23977726063829788 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Base-SFT-SLiC-HF/2c28dcd3-af20-41ab-9234-a8296ecc98c0.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Base-SFT-SLiC-HF/2c28dcd3-af20-41ab-9234-a8296ecc98c0.json deleted file mode 100644 index 07378042822804d87af8265818ef06741f6af131..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Base-SFT-SLiC-HF/2c28dcd3-af20-41ab-9234-a8296ecc98c0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-SLiC-HF/1762652580.451465", - "retrieved_timestamp": "1762652580.451466", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Base-SFT-SLiC-HF", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Base-SFT-SLiC-HF" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5127284494031392 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44223991890402176 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.035498489425981876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42608333333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2780917553191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Base-SFT-SimPO/9bed5ccb-35c0-40e1-89b8-617656787052.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Base-SFT-SimPO/9bed5ccb-35c0-40e1-89b8-617656787052.json deleted file mode 100644 index 12fb70412722324042541f7285a16e07f7c0dff2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Base-SFT-SimPO/9bed5ccb-35c0-40e1-89b8-617656787052.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Base-SFT-SimPO/1762652580.4516768", - "retrieved_timestamp": "1762652580.451678", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Base-SFT-SimPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Base-SFT-SimPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47006387496287627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4398050727924064 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39706250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27019614361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-CPO/259a0166-2ee3-409a-85ce-963d90d05ae7.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-CPO/259a0166-2ee3-409a-85ce-963d90d05ae7.json deleted file mode 100644 index bf2cc1a03798460079e6c5fdf7ae7294af74bd51..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-CPO/259a0166-2ee3-409a-85ce-963d90d05ae7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-CPO/1762652580.4518862", - "retrieved_timestamp": "1762652580.4518871", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Instruct-CPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Instruct-CPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4203047912871182 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.406922267565148 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41784375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701130319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-DPO/0df26c01-7fae-4254-8e97-e03c6078d861.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-DPO/0df26c01-7fae-4254-8e97-e03c6078d861.json deleted file mode 100644 index 6da186d998911de7c0d04450d289773f9748922b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-DPO/0df26c01-7fae-4254-8e97-e03c6078d861.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-DPO/1762652580.4521", - "retrieved_timestamp": "1762652580.4521", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Instruct-DPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Instruct-DPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.517624347841505 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4060358459697702 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030966767371601207 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3833333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2748503989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-IPO/fed6b773-040e-409b-884e-a97a1abfedc0.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-IPO/fed6b773-040e-409b-884e-a97a1abfedc0.json deleted file mode 100644 index db9b520e4a2c2da55f7a3e0cb22968ce83aca114..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-IPO/fed6b773-040e-409b-884e-a97a1abfedc0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-IPO/1762652580.45231", - "retrieved_timestamp": "1762652580.45231", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Instruct-IPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Instruct-IPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4929198969844457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4322183023180588 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43241666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2707779255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-KTO/ff079687-4519-4f0b-bb1e-2b447cb2b4c9.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-KTO/ff079687-4519-4f0b-bb1e-2b447cb2b4c9.json deleted file mode 100644 index 279fa85136844912441e9c3de4bb88e3a15ad76c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-KTO/ff079687-4519-4f0b-bb1e-2b447cb2b4c9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-KTO/1762652580.452526", - "retrieved_timestamp": "1762652580.452527", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Instruct-KTO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Instruct-KTO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4907966417993147 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4139586477181159 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3952708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28125 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-ORPO/36735132-1510-42cf-a68a-c46507f52edb.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-ORPO/36735132-1510-42cf-a68a-c46507f52edb.json deleted file mode 100644 index 6a013df619ef2f7d636f475a53993614853698bd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-ORPO/36735132-1510-42cf-a68a-c46507f52edb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-ORPO/1762652580.452744", - "retrieved_timestamp": "1762652580.452745", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Instruct-ORPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Instruct-ORPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4719621714827768 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41040615756566107 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3912395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2662067819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-RDPO/9989efbb-bd01-4c7c-bf30-67fa81698906.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-RDPO/9989efbb-bd01-4c7c-bf30-67fa81698906.json deleted file mode 100644 index 71434b79d1cda5cebc4c2330ac5493ad8598b741..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-RDPO/9989efbb-bd01-4c7c-bf30-67fa81698906.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-RDPO/1762652580.452956", - "retrieved_timestamp": "1762652580.452957", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Instruct-RDPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Instruct-RDPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4887232542985944 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40501479745073615 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27767619680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-RRHF/0a5ce684-675e-4fbe-b141-df12903228f9.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-RRHF/0a5ce684-675e-4fbe-b141-df12903228f9.json deleted file mode 100644 index 9c52fe1dceb3fa29f9b4ff0be4cd90feab46de09..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-RRHF/0a5ce684-675e-4fbe-b141-df12903228f9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-RRHF/1762652580.4531672", - "retrieved_timestamp": "1762652580.4531682", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Instruct-RRHF", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Instruct-RRHF" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49601723427173233 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41897663476657404 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.397875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26512632978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-SLiC-HF/8b5493df-86fd-495a-8dce-9c5398795fc9.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-SLiC-HF/8b5493df-86fd-495a-8dce-9c5398795fc9.json deleted file mode 100644 index 918b74b77629ff95dcaadb3002e6d307dfc3afb7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-SLiC-HF/8b5493df-86fd-495a-8dce-9c5398795fc9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-SLiC-HF/1762652580.453388", - "retrieved_timestamp": "1762652580.4533892", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Instruct-SLiC-HF", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Instruct-SLiC-HF" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5115294086357531 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4040013641288438 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39130208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27152593085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-SimPO/a3d0b6ec-e2be-4ca5-b083-df3c7ea0b385.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-SimPO/a3d0b6ec-e2be-4ca5-b083-df3c7ea0b385.json deleted file mode 100644 index 62eb7a95b8b7a9384bc125f1ddec5de6db700ab7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_Mistral-7B-Instruct-SimPO/a3d0b6ec-e2be-4ca5-b083-df3c7ea0b385.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_Mistral-7B-Instruct-SimPO/1762652580.45361", - "retrieved_timestamp": "1762652580.45361", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/Mistral-7B-Instruct-SimPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/Mistral-7B-Instruct-SimPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4686897432146704 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4507226157033399 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40978125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2796708776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_gemma-2-9b-it-DPO/5ed0019b-dc1e-4dd8-82e5-2d4cdb28beb9.json b/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_gemma-2-9b-it-DPO/5ed0019b-dc1e-4dd8-82e5-2d4cdb28beb9.json deleted file mode 100644 index a6a163dcf153d914ab3cad979c705d6753ef16fd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/princeton-nlp/princeton-nlp_gemma-2-9b-it-DPO/5ed0019b-dc1e-4dd8-82e5-2d4cdb28beb9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/princeton-nlp_gemma-2-9b-it-DPO/1762652580.454305", - "retrieved_timestamp": "1762652580.4543061", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "princeton-nlp/gemma-2-9b-it-DPO", - "developer": "princeton-nlp", - "inference_platform": "unknown", - "id": "princeton-nlp/gemma-2-9b-it-DPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27687203287277756 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5941444682956648 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38203125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3723404255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Bellatrix-1.5B-xElite/7f1c6c88-823f-4597-9794-bf05c076d4d3.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Bellatrix-1.5B-xElite/7f1c6c88-823f-4597-9794-bf05c076d4d3.json deleted file mode 100644 index da26d1444aac5cbf0c3644ada3c01e13d7c447d4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Bellatrix-1.5B-xElite/7f1c6c88-823f-4597-9794-bf05c076d4d3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Bellatrix-1.5B-xElite/1762652580.4551811", - "retrieved_timestamp": "1762652580.455182", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Bellatrix-1.5B-xElite", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Bellatrix-1.5B-xElite" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1964144026737944 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35011984799236834 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36190625000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1657247340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Bellatrix-Tiny-1.5B-R1/4e78f82e-aa31-414c-9c59-9c8e318fff17.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Bellatrix-Tiny-1.5B-R1/4e78f82e-aa31-414c-9c59-9c8e318fff17.json deleted file mode 100644 index ec499733fcadaa99bc59cbf970e49f69e28864d7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Bellatrix-Tiny-1.5B-R1/4e78f82e-aa31-414c-9c59-9c8e318fff17.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Bellatrix-Tiny-1.5B-R1/1762652580.455581", - "retrieved_timestamp": "1762652580.455582", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Bellatrix-Tiny-1.5B-R1", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Bellatrix-Tiny-1.5B-R1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33522498082864577 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40221745714531076 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3682916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27509973404255317 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Bellatrix-Tiny-1B-v2/715be726-e0e3-4589-91cf-85e41dbcbf8a.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Bellatrix-Tiny-1B-v2/715be726-e0e3-4589-91cf-85e41dbcbf8a.json deleted file mode 100644 index 3d929cc3a77b39af548c6de20d0c7d6fd2edb216..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Bellatrix-Tiny-1B-v2/715be726-e0e3-4589-91cf-85e41dbcbf8a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Bellatrix-Tiny-1B-v2/1762652580.4558249", - "retrieved_timestamp": "1762652580.4558249", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Bellatrix-Tiny-1B-v2", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Bellatrix-Tiny-1B-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15095169705270903 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3267684418723903 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028700906344410877 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34302083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14926861702127658 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Blaze-14B-xElite/c4041b70-acce-4088-a3b9-299d4424e240.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Blaze-14B-xElite/c4041b70-acce-4088-a3b9-299d4424e240.json deleted file mode 100644 index 0ef70497b39a4d352af8ef60ba1e362f1e605c4a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Blaze-14B-xElite/c4041b70-acce-4088-a3b9-299d4424e240.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Blaze-14B-xElite/1762652580.456049", - "retrieved_timestamp": "1762652580.45605", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Blaze-14B-xElite", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Blaze-14B-xElite" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03632029681245762 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6627817236091689 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3693353474320242 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39429530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46248958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5111369680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_COCO-7B-Instruct-1M/a7b425bc-9160-44ed-abf1-18c3b84cede2.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_COCO-7B-Instruct-1M/a7b425bc-9160-44ed-abf1-18c3b84cede2.json deleted file mode 100644 index ce61b03b4afd0d4f1c4cedf95a312735fbdc8d2b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_COCO-7B-Instruct-1M/a7b425bc-9160-44ed-abf1-18c3b84cede2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_COCO-7B-Instruct-1M/1762652580.456335", - "retrieved_timestamp": "1762652580.456337", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/COCO-7B-Instruct-1M", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/COCO-7B-Instruct-1M" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4743103853331383 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5409956853800891 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3496978851963746 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4382395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41863364361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Elite-1M/0c883e9c-4cec-4c65-aa10-96e0d0de2e1f.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Elite-1M/0c883e9c-4cec-4c65-aa10-96e0d0de2e1f.json deleted file mode 100644 index c00033ca08f518bc778b9968a30dbb8778829d30..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Elite-1M/0c883e9c-4cec-4c65-aa10-96e0d0de2e1f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite-1M/1762652580.457102", - "retrieved_timestamp": "1762652580.457103", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Calcium-Opus-14B-Elite-1M", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Calcium-Opus-14B-Elite-1M" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5612884923115112 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6329399079569701 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44561933534743203 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523489932885906 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46760416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5152094414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Elite-Stock/74d10ea5-3d08-4bb2-9246-5e053eb20fea.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Elite-Stock/74d10ea5-3d08-4bb2-9246-5e053eb20fea.json deleted file mode 100644 index 33475730e415cfc2618aa80768a3a186ce849ed6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Elite-Stock/74d10ea5-3d08-4bb2-9246-5e053eb20fea.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite-Stock/1762652580.457346", - "retrieved_timestamp": "1762652580.4573472", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Calcium-Opus-14B-Elite-Stock", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Calcium-Opus-14B-Elite-Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.614294516327788 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6328767168557433 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46676737160120846 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36828859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48075 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5284242021276596 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Elite/487e1883-01c6-4714-9447-67837c78655b.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Elite/487e1883-01c6-4714-9447-67837c78655b.json deleted file mode 100644 index 73f723aff7db3aca80d4810d21b05b5b1aceae40..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Elite/487e1883-01c6-4714-9447-67837c78655b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite/1762652580.456628", - "retrieved_timestamp": "1762652580.456629", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Calcium-Opus-14B-Elite", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Calcium-Opus-14B-Elite" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6051521075191603 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6317361472468987 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4788519637462236 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37416107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4859583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5301695478723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Elite/79bccc27-27a0-4194-9c46-5e89b0f21b9e.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Elite/79bccc27-27a0-4194-9c46-5e89b0f21b9e.json deleted file mode 100644 index 63ab78ed9cd60c95dc11c7ad15c2174d969d1c60..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Elite/79bccc27-27a0-4194-9c46-5e89b0f21b9e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite/1762652580.456884", - "retrieved_timestamp": "1762652580.456885", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Calcium-Opus-14B-Elite", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Calcium-Opus-14B-Elite" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6063511482865463 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6295900497885079 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37084592145015105 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48732291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5306682180851063 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Elite2-R1/6eeb591b-aed2-4cdd-85bb-75011c9c5760.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Elite2-R1/6eeb591b-aed2-4cdd-85bb-75011c9c5760.json deleted file mode 100644 index 7e6c626d7cacc45ae4e6012115b22d04036c6fc5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Elite2-R1/6eeb591b-aed2-4cdd-85bb-75011c9c5760.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite2-R1/1762652580.457828", - "retrieved_timestamp": "1762652580.4578292", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Calcium-Opus-14B-Elite2-R1", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Calcium-Opus-14B-Elite2-R1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6325793339450436 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6362357624539174 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3338368580060423 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39093959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48998958333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5247672872340425 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Elite2/689d38cd-898e-43ec-92e8-238cefac6776.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Elite2/689d38cd-898e-43ec-92e8-238cefac6776.json deleted file mode 100644 index 2012f845866f11ff99ee02830c2fed385fe4cba8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Elite2/689d38cd-898e-43ec-92e8-238cefac6776.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite2/1762652580.457599", - "retrieved_timestamp": "1762652580.4576", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Calcium-Opus-14B-Elite2", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Calcium-Opus-14B-Elite2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6176168122803052 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6318256156619112 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4690332326283988 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699664429530201 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49395833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5300864361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Elite3/2edb276e-86c5-4bde-a696-4f68fb659b4e.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Elite3/2edb276e-86c5-4bde-a696-4f68fb659b4e.json deleted file mode 100644 index afd20f4b9adf91ccf2b1151dc4fb5cae8d3ac7ca..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Elite3/2edb276e-86c5-4bde-a696-4f68fb659b4e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite3/1762652580.458055", - "retrieved_timestamp": "1762652580.458056", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Calcium-Opus-14B-Elite3", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Calcium-Opus-14B-Elite3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5428285837134359 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6350402275340573 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4705438066465257 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37080536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4794791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5334940159574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Elite4/380cd349-5309-40b8-b549-ac6d6d42331a.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Elite4/380cd349-5309-40b8-b549-ac6d6d42331a.json deleted file mode 100644 index 580e991a6ebc6e4b6a8dd497178a9dfb5fff5c9c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Elite4/380cd349-5309-40b8-b549-ac6d6d42331a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Elite4/1762652580.4582741", - "retrieved_timestamp": "1762652580.458275", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Calcium-Opus-14B-Elite4", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Calcium-Opus-14B-Elite4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6111971790405014 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6195264951573699 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36253776435045315 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35570469798657717 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46871875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.514876994680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Merge/6d4dfc45-b7ff-47a2-bcf0-f12641365cbf.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Merge/6d4dfc45-b7ff-47a2-bcf0-f12641365cbf.json deleted file mode 100644 index 27a6272f5a3ed9d295a46d34dbc426610f63f95b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-14B-Merge/6d4dfc45-b7ff-47a2-bcf0-f12641365cbf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-14B-Merge/1762652580.4585001", - "retrieved_timestamp": "1762652580.458503", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Calcium-Opus-14B-Merge", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Calcium-Opus-14B-Merge" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4949434168007554 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6319290054891645 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4637462235649547 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37080536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48608333333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5355718085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-20B-v1/9c414577-7f2d-487a-9f2b-7675e0532ac1.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-20B-v1/9c414577-7f2d-487a-9f2b-7675e0532ac1.json deleted file mode 100644 index 7fd29b0c5142f8c5d68a4a192197c0626d19a082..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Calcium-Opus-20B-v1/9c414577-7f2d-487a-9f2b-7675e0532ac1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Calcium-Opus-20B-v1/1762652580.458724", - "retrieved_timestamp": "1762652580.4587252", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Calcium-Opus-20B-v1", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Calcium-Opus-20B-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3092716215197897 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.599033246250772 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36178247734138974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35318791946308725 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49433333333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4734042553191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 19.173 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Codepy-Deepthink-3B/adb6f7d5-db2f-49b1-aab4-1fd3dfcb7e34.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Codepy-Deepthink-3B/adb6f7d5-db2f-49b1-aab4-1fd3dfcb7e34.json deleted file mode 100644 index 842d0a907ad3431d1a623e543150e8f83c1885cb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Codepy-Deepthink-3B/adb6f7d5-db2f-49b1-aab4-1fd3dfcb7e34.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Codepy-Deepthink-3B/1762652580.458943", - "retrieved_timestamp": "1762652580.458944", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Codepy-Deepthink-3B", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Codepy-Deepthink-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43271962836385236 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4259451388094382 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11555891238670694 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3310208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3090093085106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Coma-II-14B/785e4cde-ec97-4e36-8ee3-3fb4c2543901.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Coma-II-14B/785e4cde-ec97-4e36-8ee3-3fb4c2543901.json deleted file mode 100644 index 184a0c16a95adc7dd20fe712c63f3ce5cde92ad3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Coma-II-14B/785e4cde-ec97-4e36-8ee3-3fb4c2543901.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Coma-II-14B/1762652580.4591591", - "retrieved_timestamp": "1762652580.45916", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Coma-II-14B", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Coma-II-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.416832892281369 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6320713788922736 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4001677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5351041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5039893617021277 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Condor-Opus-14B-Exp/7b9f72e6-0280-46ba-8645-ab8dcb9ddf4d.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Condor-Opus-14B-Exp/7b9f72e6-0280-46ba-8645-ab8dcb9ddf4d.json deleted file mode 100644 index 4f1655550258b8d16b4a9fa2bf7cf7c59a7064a7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Condor-Opus-14B-Exp/7b9f72e6-0280-46ba-8645-ab8dcb9ddf4d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Condor-Opus-14B-Exp/1762652580.4595032", - "retrieved_timestamp": "1762652580.4595041", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Condor-Opus-14B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Condor-Opus-14B-Exp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40431831983581346 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6154220154262888 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5226586102719033 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39177852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5193854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5014128989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Cygnus-II-14B/120d9ddf-0e6e-4fb9-9250-019d1fbfdc28.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Cygnus-II-14B/120d9ddf-0e6e-4fb9-9250-019d1fbfdc28.json deleted file mode 100644 index 3f47cae713ca37b21b9f1b0b436923d27bad4b2b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Cygnus-II-14B/120d9ddf-0e6e-4fb9-9250-019d1fbfdc28.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Cygnus-II-14B/1762652580.4597278", - "retrieved_timestamp": "1762652580.459729", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Cygnus-II-14B", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Cygnus-II-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6184412913292286 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6660565208074918 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4395770392749245 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46884375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5390625 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Deepthink-Reasoning-14B/343e0d36-5470-4865-aeeb-a9963b38f90a.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Deepthink-Reasoning-14B/343e0d36-5470-4865-aeeb-a9963b38f90a.json deleted file mode 100644 index a6d88179e06bd09a61efcf92095985c17cf21bf6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Deepthink-Reasoning-14B/343e0d36-5470-4865-aeeb-a9963b38f90a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Deepthink-Reasoning-14B/1762652580.460205", - "retrieved_timestamp": "1762652580.460206", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Deepthink-Reasoning-14B", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Deepthink-Reasoning-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5423542866261519 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6334054936091441 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36661073825503354 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47315625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5295877659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Deepthink-Reasoning-7B/10d2454a-ae69-43b6-962a-77102645ed56.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Deepthink-Reasoning-7B/10d2454a-ae69-43b6-962a-77102645ed56.json deleted file mode 100644 index 9543a14d8b3512272ed692d6b240dcd005b20d6f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Deepthink-Reasoning-7B/10d2454a-ae69-43b6-962a-77102645ed56.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Deepthink-Reasoning-7B/1762652580.460416", - "retrieved_timestamp": "1762652580.460416", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Deepthink-Reasoning-7B", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Deepthink-Reasoning-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48400244684104843 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5505070216145282 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33459214501510576 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4432291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43492353723404253 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Dinobot-Opus-14B-Exp/6ed13eae-92ee-4fa7-9ed8-d9f21d6de48c.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Dinobot-Opus-14B-Exp/6ed13eae-92ee-4fa7-9ed8-d9f21d6de48c.json deleted file mode 100644 index 70699a51edcb052f6bb313b6b6214d25532676f0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Dinobot-Opus-14B-Exp/6ed13eae-92ee-4fa7-9ed8-d9f21d6de48c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Dinobot-Opus-14B-Exp/1762652580.460635", - "retrieved_timestamp": "1762652580.460635", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Dinobot-Opus-14B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Dinobot-Opus-14B-Exp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8239958864701216 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6370093752306357 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42603125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4979222074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Elita-0.1-Distilled-R1-abliterated/9b63b3ad-568f-4f15-9cc6-36049ac89727.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Elita-0.1-Distilled-R1-abliterated/9b63b3ad-568f-4f15-9cc6-36049ac89727.json deleted file mode 100644 index 2e0cffaa266f9e9f0280fb04b29dfa9d017951b9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Elita-0.1-Distilled-R1-abliterated/9b63b3ad-568f-4f15-9cc6-36049ac89727.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Elita-0.1-Distilled-R1-abliterated/1762652580.460851", - "retrieved_timestamp": "1762652580.460852", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Elita-0.1-Distilled-R1-abliterated", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Elita-0.1-Distilled-R1-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35423454212600347 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38277850218543213 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3066465256797583 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36596875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2757646276595745 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Elita-1/d721cfe0-eb01-42fe-955a-bfd219c38917.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Elita-1/d721cfe0-eb01-42fe-955a-bfd219c38917.json deleted file mode 100644 index f95e5e1607b846ab8421d3d29cd90f87ce470f07..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Elita-1/d721cfe0-eb01-42fe-955a-bfd219c38917.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Elita-1/1762652580.4610822", - "retrieved_timestamp": "1762652580.4610822", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Elita-1", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Elita-1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4906470387460826 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6520409113818334 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3429003021148036 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37583892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48341666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5381482712765957 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Epimetheus-14B-Axo/dc3aed7d-01e0-46cc-85f6-2a06cf6b6edc.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Epimetheus-14B-Axo/dc3aed7d-01e0-46cc-85f6-2a06cf6b6edc.json deleted file mode 100644 index c388384f06f79ad0a3c55ade6a7c4099b5e8062d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Epimetheus-14B-Axo/dc3aed7d-01e0-46cc-85f6-2a06cf6b6edc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Epimetheus-14B-Axo/1762652580.461361", - "retrieved_timestamp": "1762652580.461361", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Epimetheus-14B-Axo", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Epimetheus-14B-Axo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.554643900406477 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6613340892011862 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41012084592145015 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3926174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4819583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5304188829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Equuleus-Opus-14B-Exp/ccce28fd-d3ae-427c-b848-f08b2cf85692.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Equuleus-Opus-14B-Exp/ccce28fd-d3ae-427c-b848-f08b2cf85692.json deleted file mode 100644 index 99636afad4be99db8987f41ed6ffa55c7a25f9db..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Equuleus-Opus-14B-Exp/ccce28fd-d3ae-427c-b848-f08b2cf85692.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Equuleus-Opus-14B-Exp/1762652580.46158", - "retrieved_timestamp": "1762652580.46158", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Equuleus-Opus-14B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Equuleus-Opus-14B-Exp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7000735825387749 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6433769213927613 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45845921450151056 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38674496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4951666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5374002659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Eridanus-Opus-14B-r999/9dd4aa3f-98aa-4e51-bd21-c999b3990a64.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Eridanus-Opus-14B-r999/9dd4aa3f-98aa-4e51-bd21-c999b3990a64.json deleted file mode 100644 index ac87985be8732e4695f803ebb9aafd4003a60792..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Eridanus-Opus-14B-r999/9dd4aa3f-98aa-4e51-bd21-c999b3990a64.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Eridanus-Opus-14B-r999/1762652580.461785", - "retrieved_timestamp": "1762652580.461786", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Eridanus-Opus-14B-r999", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Eridanus-Opus-14B-r999" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.638574537781974 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6583918169279829 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859516616314199 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39429530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.476875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5361535904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Evac-Opus-14B-Exp/26c88cb2-7c81-4b0c-8493-baa9d8f7b1a0.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Evac-Opus-14B-Exp/26c88cb2-7c81-4b0c-8493-baa9d8f7b1a0.json deleted file mode 100644 index f5c68d9ed5759039ce5ba73324c4b79fa0926e79..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Evac-Opus-14B-Exp/26c88cb2-7c81-4b0c-8493-baa9d8f7b1a0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Evac-Opus-14B-Exp/1762652580.461996", - "retrieved_timestamp": "1762652580.461997", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Evac-Opus-14B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Evac-Opus-14B-Exp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5916135852870383 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6475440673701862 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4214501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3884228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47278125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5316655585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_FastThink-0.5B-Tiny/b731eb88-e0ed-4edb-bed3-2d82bbce43bb.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_FastThink-0.5B-Tiny/b731eb88-e0ed-4edb-bed3-2d82bbce43bb.json deleted file mode 100644 index 4eafc23d24fae6322b866bb86b75c3997b9deaf6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_FastThink-0.5B-Tiny/b731eb88-e0ed-4edb-bed3-2d82bbce43bb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_FastThink-0.5B-Tiny/1762652580.462207", - "retrieved_timestamp": "1762652580.462208", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/FastThink-0.5B-Tiny", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/FastThink-0.5B-Tiny" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25798880304259364 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3205583807088257 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3566354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16489361702127658 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_GWQ-9B-Preview/7735d88c-bdaa-4a12-9a99-a2dc5ec2ec66.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_GWQ-9B-Preview/7735d88c-bdaa-4a12-9a99-a2dc5ec2ec66.json deleted file mode 100644 index bc9c36796464a9e78eabf8ea5d8ddfd985760005..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_GWQ-9B-Preview/7735d88c-bdaa-4a12-9a99-a2dc5ec2ec66.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_GWQ-9B-Preview/1762652580.4624221", - "retrieved_timestamp": "1762652580.462423", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/GWQ-9B-Preview", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/GWQ-9B-Preview" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5065836425129767 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5805745804247511 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22658610271903323 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976510067114096 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4951041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39835438829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_GWQ-9B-Preview2/5c534761-19b5-4111-b1f5-c2fc3e121b24.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_GWQ-9B-Preview2/5c534761-19b5-4111-b1f5-c2fc3e121b24.json deleted file mode 100644 index 200bed56a1d24163a6d379dab6d3326950a56a1f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_GWQ-9B-Preview2/5c534761-19b5-4111-b1f5-c2fc3e121b24.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_GWQ-9B-Preview2/1762652580.462637", - "retrieved_timestamp": "1762652580.4626381", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/GWQ-9B-Preview2", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/GWQ-9B-Preview2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5208967761096114 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5797218710843371 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23716012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48598958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3996841755319149 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_GWQ2b/8a89468f-fe2f-4bc9-be99-c9619c605efc.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_GWQ2b/8a89468f-fe2f-4bc9-be99-c9619c605efc.json deleted file mode 100644 index 3b39f82dea711c182cf415e6e98d9ee28aaf981a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_GWQ2b/8a89468f-fe2f-4bc9-be99-c9619c605efc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_GWQ2b/1762652580.462852", - "retrieved_timestamp": "1762652580.4628532", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/GWQ2b", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/GWQ2b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41148707651254224 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41433702954085216 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43111458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24725731382978725 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Gaea-Opus-14B-Exp/f75e27a8-00e8-4473-b7ed-3fffa131ee0a.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Gaea-Opus-14B-Exp/f75e27a8-00e8-4473-b7ed-3fffa131ee0a.json deleted file mode 100644 index d0376ff0e99b0163bbda841bfb0c704d3b0e22c2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Gaea-Opus-14B-Exp/f75e27a8-00e8-4473-b7ed-3fffa131ee0a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Gaea-Opus-14B-Exp/1762652580.463063", - "retrieved_timestamp": "1762652580.463063", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Gaea-Opus-14B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Gaea-Opus-14B-Exp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5956351369920699 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6560465337491567 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42749244712990936 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39093959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48589583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5400598404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Gauss-Opus-14B-R999/e8596a17-9e5d-4ac5-9968-44d302628c31.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Gauss-Opus-14B-R999/e8596a17-9e5d-4ac5-9968-44d302628c31.json deleted file mode 100644 index d7e473e6a5bf3ee91ff8e302be10cadb355a6c20..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Gauss-Opus-14B-R999/e8596a17-9e5d-4ac5-9968-44d302628c31.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Gauss-Opus-14B-R999/1762652580.463757", - "retrieved_timestamp": "1762652580.463758", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Gauss-Opus-14B-R999", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Gauss-Opus-14B-R999" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39065457430728245 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6227831608555382 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5755287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39177852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5338333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.500748005319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Jolt-v0.1/d96ef95b-ca39-4e33-9f6b-a4faa71e5009.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Jolt-v0.1/d96ef95b-ca39-4e33-9f6b-a4faa71e5009.json deleted file mode 100644 index 2afc4b2ca3fd498b3302acdca2ca099b93e25df6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Jolt-v0.1/d96ef95b-ca39-4e33-9f6b-a4faa71e5009.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Jolt-v0.1/1762652580.463978", - "retrieved_timestamp": "1762652580.463979", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Jolt-v0.1", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Jolt-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5092066827129793 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6521408461659391 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3564954682779456 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48471875000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5386469414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Lacerta-Opus-14B-Elite8/21b53896-3b7b-470a-a49f-4b2cb4e6adef.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Lacerta-Opus-14B-Elite8/21b53896-3b7b-470a-a49f-4b2cb4e6adef.json deleted file mode 100644 index 3ecaf140517b6ed016f0cc418aeeebea4b565712..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Lacerta-Opus-14B-Elite8/21b53896-3b7b-470a-a49f-4b2cb4e6adef.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Lacerta-Opus-14B-Elite8/1762652580.464193", - "retrieved_timestamp": "1762652580.464193", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Lacerta-Opus-14B-Elite8", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Lacerta-Opus-14B-Elite8" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.614144913274556 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6401384743047456 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3648036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3783557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4635416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5321642287234043 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Llama-3.1-5B-Instruct/cdc5671a-e164-43b9-864c-808a9464e618.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Llama-3.1-5B-Instruct/cdc5671a-e164-43b9-864c-808a9464e618.json deleted file mode 100644 index d9bf5770178ca9b16317e710990d107ffafce5b5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Llama-3.1-5B-Instruct/cdc5671a-e164-43b9-864c-808a9464e618.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-3.1-5B-Instruct/1762652580.464407", - "retrieved_timestamp": "1762652580.4644082", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Llama-3.1-5B-Instruct", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Llama-3.1-5B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14066011516110588 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3051074188361172 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015105740181268883 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35400000000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11835106382978723 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 5.413 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Llama-3.1-8B-Open-SFT/37276848-95fe-4403-896d-bf9fafbff04e.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Llama-3.1-8B-Open-SFT/37276848-95fe-4403-896d-bf9fafbff04e.json deleted file mode 100644 index fd9ed6eb96c5505610a1ec2b5110799e73b21aea..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Llama-3.1-8B-Open-SFT/37276848-95fe-4403-896d-bf9fafbff04e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Llama-3.1-8B-Open-SFT/1762652580.464622", - "retrieved_timestamp": "1762652580.4646232", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Llama-3.1-8B-Open-SFT", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Llama-3.1-8B-Open-SFT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4122616878770551 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4967982234773378 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1216012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39036458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35222739361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_LwQ-10B-Instruct/df470b21-0d55-4d28-af25-75908799a0cc.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_LwQ-10B-Instruct/df470b21-0d55-4d28-af25-75908799a0cc.json deleted file mode 100644 index eb7a36f50bca4aef84218ec32048b48e41614baa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_LwQ-10B-Instruct/df470b21-0d55-4d28-af25-75908799a0cc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_LwQ-10B-Instruct/1762652580.4662411", - "retrieved_timestamp": "1762652580.466242", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/LwQ-10B-Instruct", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/LwQ-10B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934770852449279 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5121712029712329 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45439583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.331781914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_LwQ-Reasoner-10B/d22507ab-2601-4bf0-a8d8-b456102c85af.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_LwQ-Reasoner-10B/d22507ab-2601-4bf0-a8d8-b456102c85af.json deleted file mode 100644 index 0e343e47cee818633c3799c3696db2143d54d161..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_LwQ-Reasoner-10B/d22507ab-2601-4bf0-a8d8-b456102c85af.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_LwQ-Reasoner-10B/1762652580.466471", - "retrieved_timestamp": "1762652580.466471", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/LwQ-Reasoner-10B", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/LwQ-Reasoner-10B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29413400887423147 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5866254169962443 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3580060422960725 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40785416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41472739361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Magellanic-Opus-14B-Exp/07236482-8709-4aa8-8e63-762b2f591b2a.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Magellanic-Opus-14B-Exp/07236482-8709-4aa8-8e63-762b2f591b2a.json deleted file mode 100644 index 0950d8ec94765cae45316a5f8b8ee84958db1931..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Magellanic-Opus-14B-Exp/07236482-8709-4aa8-8e63-762b2f591b2a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Magellanic-Opus-14B-Exp/1762652580.466739", - "retrieved_timestamp": "1762652580.466739", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Magellanic-Opus-14B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Magellanic-Opus-14B-Exp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6866347956754744 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6382505935140227 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37990936555891236 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37416107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49262500000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5272606382978723 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Megatron-Corpus-14B-Exp.v2/f50a6538-057e-4e57-af79-ba3a5b7121cb.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Megatron-Corpus-14B-Exp.v2/f50a6538-057e-4e57-af79-ba3a5b7121cb.json deleted file mode 100644 index 9a7088383051b944ddcd094960d791a6ed778541..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Megatron-Corpus-14B-Exp.v2/f50a6538-057e-4e57-af79-ba3a5b7121cb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Corpus-14B-Exp.v2/1762652580.467396", - "retrieved_timestamp": "1762652580.4673972", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Megatron-Corpus-14B-Exp.v2", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Megatron-Corpus-14B-Exp.v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48704991644392437 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.632146083740281 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2590634441087613 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3422818791946309 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.449 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48096742021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Megatron-Corpus-14B-Exp/f71c4189-288e-4c6d-978c-d793ca57fedf.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Megatron-Corpus-14B-Exp/f71c4189-288e-4c6d-978c-d793ca57fedf.json deleted file mode 100644 index 855553925a04d0583fca1db793f84e285251ba81..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Megatron-Corpus-14B-Exp/f71c4189-288e-4c6d-978c-d793ca57fedf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Corpus-14B-Exp/1762652580.46718", - "retrieved_timestamp": "1762652580.46718", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Megatron-Corpus-14B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Megatron-Corpus-14B-Exp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49826571275327247 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6355171004470184 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3429003021148036 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36325503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4766875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5260139627659575 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Megatron-Opus-14B-2.0/c6dd1b78-b487-4197-8a66-c364487ff6fb.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Megatron-Opus-14B-2.0/c6dd1b78-b487-4197-8a66-c364487ff6fb.json deleted file mode 100644 index c3d93f0055e6d8cb633bb9ed0767af714f49dc3c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Megatron-Opus-14B-2.0/c6dd1b78-b487-4197-8a66-c364487ff6fb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Opus-14B-2.0/1762652580.467613", - "retrieved_timestamp": "1762652580.467613", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Megatron-Opus-14B-2.0", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Megatron-Opus-14B-2.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6693739278447852 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6870557211788685 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27794561933534745 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35906040268456374 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41403125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5170378989361702 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Megatron-Opus-14B-2.1/002ba3ef-6ac7-4bdf-bd7d-42ef16aa7cc9.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Megatron-Opus-14B-2.1/002ba3ef-6ac7-4bdf-bd7d-42ef16aa7cc9.json deleted file mode 100644 index faf034046a480f139d947e52e5b65a36ef445607..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Megatron-Opus-14B-2.1/002ba3ef-6ac7-4bdf-bd7d-42ef16aa7cc9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Opus-14B-2.1/1762652580.4678242", - "retrieved_timestamp": "1762652580.467825", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Megatron-Opus-14B-2.1", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Megatron-Opus-14B-2.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02455484780382718 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6726960005125086 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2998489425981873 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38338926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49275 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5173703457446809 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 14.66 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Megatron-Opus-14B-Exp/ac65fabb-07d5-457d-844e-19aecf2b18e0.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Megatron-Opus-14B-Exp/ac65fabb-07d5-457d-844e-19aecf2b18e0.json deleted file mode 100644 index 6926b329f07b44a4a197c7deaaf5566c08ddda23..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Megatron-Opus-14B-Exp/ac65fabb-07d5-457d-844e-19aecf2b18e0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Opus-14B-Exp/1762652580.46803", - "retrieved_timestamp": "1762652580.468031", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Megatron-Opus-14B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Megatron-Opus-14B-Exp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4979410187192206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6516090109599467 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48865625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5400598404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Megatron-Opus-14B-Stock/8a0828ef-56a0-4c2b-bc61-f955c56b7700.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Megatron-Opus-14B-Stock/8a0828ef-56a0-4c2b-bc61-f955c56b7700.json deleted file mode 100644 index 0a23a8ab0c74c56a2b11aab0b54e1261a4b6d2c6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Megatron-Opus-14B-Stock/8a0828ef-56a0-4c2b-bc61-f955c56b7700.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Opus-14B-Stock/1762652580.468238", - "retrieved_timestamp": "1762652580.468238", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Megatron-Opus-14B-Stock", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Megatron-Opus-14B-Stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5173750094194515 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6411753580495262 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33459214501510576 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.375 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48202083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5293384308510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Megatron-Opus-7B-Exp/94536d01-2de8-4305-83aa-2673a226ab64.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Megatron-Opus-7B-Exp/94536d01-2de8-4305-83aa-2673a226ab64.json deleted file mode 100644 index 1653a0695927cdb7f4a73df5293019b53f54fcbe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Megatron-Opus-7B-Exp/94536d01-2de8-4305-83aa-2673a226ab64.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Megatron-Opus-7B-Exp/1762652580.468447", - "retrieved_timestamp": "1762652580.468448", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Megatron-Opus-7B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Megatron-Opus-7B-Exp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6017300761978217 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5367154102661396 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1971299093655589 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4185833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3900432180851064 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Messier-Opus-14B-Elite7/e2ac8e52-8326-496a-b904-ca0e48190b3b.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Messier-Opus-14B-Elite7/e2ac8e52-8326-496a-b904-ca0e48190b3b.json deleted file mode 100644 index 2e3d9232bfeadc2d84dfcde2a95c9e622f7aafa5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Messier-Opus-14B-Elite7/e2ac8e52-8326-496a-b904-ca0e48190b3b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Messier-Opus-14B-Elite7/1762652580.4686568", - "retrieved_timestamp": "1762652580.468658", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Messier-Opus-14B-Elite7", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Messier-Opus-14B-Elite7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7113392465325337 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6498611961862557 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4070996978851964 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39093959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4885625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5403922872340425 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Omni-Reasoner-Merged/8043bcfd-1a4c-45c5-aca4-f23f02bd5562.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Omni-Reasoner-Merged/8043bcfd-1a4c-45c5-aca4-f23f02bd5562.json deleted file mode 100644 index de26d865bf838a19308489fe723db133b9d25a0e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Omni-Reasoner-Merged/8043bcfd-1a4c-45c5-aca4-f23f02bd5562.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Omni-Reasoner-Merged/1762652580.468864", - "retrieved_timestamp": "1762652580.468864", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Omni-Reasoner-Merged", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Omni-Reasoner-Merged" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4599473840520929 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5507848245879011 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3330815709969788 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4616458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43641954787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Omni-Reasoner3-Merged/972cdfdc-1c7f-4900-8acf-d5eed0ccc968.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Omni-Reasoner3-Merged/972cdfdc-1c7f-4900-8acf-d5eed0ccc968.json deleted file mode 100644 index 9a5bf5ddd2f7ef63b5ef9af1dd5be0667c26beaa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Omni-Reasoner3-Merged/972cdfdc-1c7f-4900-8acf-d5eed0ccc968.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Omni-Reasoner3-Merged/1762652580.46908", - "retrieved_timestamp": "1762652580.4690812", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Omni-Reasoner3-Merged", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Omni-Reasoner3-Merged" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.493469549683728 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4387847138827546 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10876132930513595 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35222916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29496343085106386 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Pegasus-Opus-14B-Exp/5cc40900-fe74-469a-99c0-74e998b0e316.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Pegasus-Opus-14B-Exp/5cc40900-fe74-469a-99c0-74e998b0e316.json deleted file mode 100644 index 15c14033ec6bb28201a2596497be64192fe60af8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Pegasus-Opus-14B-Exp/5cc40900-fe74-469a-99c0-74e998b0e316.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Pegasus-Opus-14B-Exp/1762652580.469298", - "retrieved_timestamp": "1762652580.4692988", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Pegasus-Opus-14B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Pegasus-Opus-14B-Exp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6981752860188744 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6547548394062034 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4086102719033233 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4859583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5412234042553191 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Porpoise-Opus-14B-Exp/79832ae5-0a80-4e46-8175-4baa240dc4d9.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Porpoise-Opus-14B-Exp/79832ae5-0a80-4e46-8175-4baa240dc4d9.json deleted file mode 100644 index 1ab31f3673dd641affc5040b2925362e3105b0aa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Porpoise-Opus-14B-Exp/79832ae5-0a80-4e46-8175-4baa240dc4d9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Porpoise-Opus-14B-Exp/1762652580.47141", - "retrieved_timestamp": "1762652580.471411", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Porpoise-Opus-14B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Porpoise-Opus-14B-Exp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7098155117310957 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6518903547146537 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4040785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4925625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5396442819148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Primal-Opus-14B-Optimus-v1/94c21b1f-ce8d-4488-a1d1-2769d34f29ec.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Primal-Opus-14B-Optimus-v1/94c21b1f-ce8d-4488-a1d1-2769d34f29ec.json deleted file mode 100644 index d5019e01548060637e73468d8577e806560b15ae..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Primal-Opus-14B-Optimus-v1/94c21b1f-ce8d-4488-a1d1-2769d34f29ec.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Primal-Opus-14B-Optimus-v1/1762652580.4716318", - "retrieved_timestamp": "1762652580.471633", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Primal-Opus-14B-Optimus-v1", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Primal-Opus-14B-Optimus-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5013131823561483 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6419423743359406 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.338368580060423 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3724832214765101 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48471875000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5259308510638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Primal-Opus-14B-Optimus-v2/80407172-765a-4aa9-b189-a322150b1a7b.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Primal-Opus-14B-Optimus-v2/80407172-765a-4aa9-b189-a322150b1a7b.json deleted file mode 100644 index 41cda8097a253f5e8ba327b6395a7ab063c64152..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Primal-Opus-14B-Optimus-v2/80407172-765a-4aa9-b189-a322150b1a7b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Primal-Opus-14B-Optimus-v2/1762652580.471854", - "retrieved_timestamp": "1762652580.471854", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Primal-Opus-14B-Optimus-v2", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Primal-Opus-14B-Optimus-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6403730989330532 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6543780845512958 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4206948640483384 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39177852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48998958333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.542220744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_QwQ-LCoT-14B-Conversational/71114773-e285-4666-ae7f-5fd7c9084104.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_QwQ-LCoT-14B-Conversational/71114773-e285-4666-ae7f-5fd7c9084104.json deleted file mode 100644 index c94ffd02c0f7cc676abb8dceef2682bb10f1d3bb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_QwQ-LCoT-14B-Conversational/71114773-e285-4666-ae7f-5fd7c9084104.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-LCoT-14B-Conversational/1762652580.472128", - "retrieved_timestamp": "1762652580.472129", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/QwQ-LCoT-14B-Conversational", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/QwQ-LCoT-14B-Conversational" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4047427492386867 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6239828933798323 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3498322147651007 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48471875000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.527842420212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_QwQ-LCoT-3B-Instruct/87fc8696-17f1-4a86-8d0d-f5b124144384.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_QwQ-LCoT-3B-Instruct/87fc8696-17f1-4a86-8d0d-f5b124144384.json deleted file mode 100644 index 57f017db84ba12aed377b6331918464d7d99a234..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_QwQ-LCoT-3B-Instruct/87fc8696-17f1-4a86-8d0d-f5b124144384.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-LCoT-3B-Instruct/1762652580.47235", - "retrieved_timestamp": "1762652580.472351", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/QwQ-LCoT-3B-Instruct", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/QwQ-LCoT-3B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4354424039326764 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47629783868435643 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2824773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43579166666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3582114361702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_QwQ-LCoT-7B-Instruct/23f056f6-67dd-41fd-b1af-a1cf9abf784c.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_QwQ-LCoT-7B-Instruct/23f056f6-67dd-41fd-b1af-a1cf9abf784c.json deleted file mode 100644 index a45b5fea3bc0b08de2c8ce4d856551dde2ea30d9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_QwQ-LCoT-7B-Instruct/23f056f6-67dd-41fd-b1af-a1cf9abf784c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-LCoT-7B-Instruct/1762652580.4725702", - "retrieved_timestamp": "1762652580.472571", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/QwQ-LCoT-7B-Instruct", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/QwQ-LCoT-7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4986901421561457 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5466466326018563 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4801875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4334275265957447 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_QwQ-LCoT1-Merged/34aec318-6db4-4df6-9d6a-ad15e353f36a.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_QwQ-LCoT1-Merged/34aec318-6db4-4df6-9d6a-ad15e353f36a.json deleted file mode 100644 index ea824d99c638633ca999eae3ada186a15eef3826..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_QwQ-LCoT1-Merged/34aec318-6db4-4df6-9d6a-ad15e353f36a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-LCoT1-Merged/1762652580.47278", - "retrieved_timestamp": "1762652580.472781", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/QwQ-LCoT1-Merged", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/QwQ-LCoT1-Merged" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47513486438206187 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.548095531408024 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3731117824773414 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46961458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4357546542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_QwQ-LCoT2-7B-Instruct/8c05d496-c21f-4a70-b312-1c1ba37d877a.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_QwQ-LCoT2-7B-Instruct/8c05d496-c21f-4a70-b312-1c1ba37d877a.json deleted file mode 100644 index 9d34d70f1bf11ec2b5dc887b4478cfff6ed191e7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_QwQ-LCoT2-7B-Instruct/8c05d496-c21f-4a70-b312-1c1ba37d877a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-LCoT2-7B-Instruct/1762652580.473001", - "retrieved_timestamp": "1762652580.473002", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/QwQ-LCoT2-7B-Instruct", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/QwQ-LCoT2-7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5561177675235043 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5424862934133593 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3270392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4564375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4341755319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_QwQ-MathOct-7B/e703fed7-cf06-4caa-b78f-3e398b437671.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_QwQ-MathOct-7B/e703fed7-cf06-4caa-b78f-3e398b437671.json deleted file mode 100644 index f9337fd6bf2f3aaa26404c37b22cf1368bf6b9d8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_QwQ-MathOct-7B/e703fed7-cf06-4caa-b78f-3e398b437671.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-MathOct-7B/1762652580.473228", - "retrieved_timestamp": "1762652580.4732292", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/QwQ-MathOct-7B", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/QwQ-MathOct-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4684404047926169 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5485512215016556 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29531722054380666 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4600625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4330119680851064 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_QwQ-R1-Distill-1.5B-CoT/8dd67de7-0d3b-4359-b390-d90c609dea5a.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_QwQ-R1-Distill-1.5B-CoT/8dd67de7-0d3b-4359-b390-d90c609dea5a.json deleted file mode 100644 index 88b0a1bf37a0688c98c26bb02c49176a9614bb4a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_QwQ-R1-Distill-1.5B-CoT/8dd67de7-0d3b-4359-b390-d90c609dea5a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-R1-Distill-1.5B-CoT/1762652580.4734771", - "retrieved_timestamp": "1762652580.473483", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/QwQ-R1-Distill-1.5B-CoT", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/QwQ-R1-Distill-1.5B-CoT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21939564799177294 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36662076641982305 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33459214501510576 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34339583333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19132313829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_QwQ-R1-Distill-7B-CoT/a723f173-af0e-4172-a43c-278ccbacac18.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_QwQ-R1-Distill-7B-CoT/a723f173-af0e-4172-a43c-278ccbacac18.json deleted file mode 100644 index 38c9999c2cd2634dae589c8d8e097663acf1ef1a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_QwQ-R1-Distill-7B-CoT/a723f173-af0e-4172-a43c-278ccbacac18.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_QwQ-R1-Distill-7B-CoT/1762652580.473804", - "retrieved_timestamp": "1762652580.473805", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/QwQ-R1-Distill-7B-CoT", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/QwQ-R1-Distill-7B-CoT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3500378994401522 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.438788672517715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46827794561933533 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37790624999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2804188829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Qwen2.5-1.5B-DeepSeek-R1-Instruct/b1430f51-cd48-4feb-8d94-c2a9a60f00bc.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Qwen2.5-1.5B-DeepSeek-R1-Instruct/b1430f51-cd48-4feb-8d94-c2a9a60f00bc.json deleted file mode 100644 index b589297ebb1601c9627b610a5f4af59090859fe7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Qwen2.5-1.5B-DeepSeek-R1-Instruct/b1430f51-cd48-4feb-8d94-c2a9a60f00bc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Qwen2.5-1.5B-DeepSeek-R1-Instruct/1762652580.474298", - "retrieved_timestamp": "1762652580.474299", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Qwen2.5-1.5B-DeepSeek-R1-Instruct", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Qwen2.5-1.5B-DeepSeek-R1-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13968603305895025 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28243669901671337 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3723541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11228390957446809 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_SmolLM2-CoT-360M/8ce4dea8-d674-4b95-b025-0c6ab60f6544.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_SmolLM2-CoT-360M/8ce4dea8-d674-4b95-b025-0c6ab60f6544.json deleted file mode 100644 index cbfaa55f949b92ea0326f8970547ab41c47b62c6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_SmolLM2-CoT-360M/8ce4dea8-d674-4b95-b025-0c6ab60f6544.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_SmolLM2-CoT-360M/1762652580.475137", - "retrieved_timestamp": "1762652580.475137", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/SmolLM2-CoT-360M", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/SmolLM2-CoT-360M" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22156877086131466 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31352960121180296 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02039274924471299 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23657718120805368 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3793958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1085438829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.362 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Sombrero-Opus-14B-Elite5/3b12518e-ef16-4a72-89bb-071802ca636c.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Sombrero-Opus-14B-Elite5/3b12518e-ef16-4a72-89bb-071802ca636c.json deleted file mode 100644 index 98d87c87ad6e573e4c6a88eedd57098868b1a957..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Sombrero-Opus-14B-Elite5/3b12518e-ef16-4a72-89bb-071802ca636c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Sombrero-Opus-14B-Elite5/1762652580.4753642", - "retrieved_timestamp": "1762652580.4753652", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Sombrero-Opus-14B-Elite5", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Sombrero-Opus-14B-Elite5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7880756393037142 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6501539892126272 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5354984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33640939597315433 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4286666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.520029920212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Sombrero-Opus-14B-Elite6/0d354980-9f24-4b79-afb7-a7e6f52e8131.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Sombrero-Opus-14B-Elite6/0d354980-9f24-4b79-afb7-a7e6f52e8131.json deleted file mode 100644 index 5f58df3eaca30ed9f84da452c2fd59e336304bdf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Sombrero-Opus-14B-Elite6/0d354980-9f24-4b79-afb7-a7e6f52e8131.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Sombrero-Opus-14B-Elite6/1762652580.47572", - "retrieved_timestamp": "1762652580.475722", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Sombrero-Opus-14B-Elite6", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Sombrero-Opus-14B-Elite6" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7226049105262924 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6487937804559186 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48859375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5389793882978723 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Sombrero-Opus-14B-Sm1/5ce1b22c-7daa-4714-a774-d7d509fa869f.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Sombrero-Opus-14B-Sm1/5ce1b22c-7daa-4714-a774-d7d509fa869f.json deleted file mode 100644 index 5ddefbcf82a0eb2f7f4591f541cd2adbb9cdda40..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Sombrero-Opus-14B-Sm1/5ce1b22c-7daa-4714-a774-d7d509fa869f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Sombrero-Opus-14B-Sm1/1762652580.476064", - "retrieved_timestamp": "1762652580.476065", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Sombrero-Opus-14B-Sm1", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Sombrero-Opus-14B-Sm1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3812872068334242 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.635462046379832 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4035234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5298958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.512466755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Sombrero-Opus-14B-Sm2/6a1519e9-062b-454f-97cb-e57454f74e9a.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Sombrero-Opus-14B-Sm2/6a1519e9-062b-454f-97cb-e57454f74e9a.json deleted file mode 100644 index 562e8122dd58dfbcee3421796d50c0ff655ef4a8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Sombrero-Opus-14B-Sm2/6a1519e9-062b-454f-97cb-e57454f74e9a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Sombrero-Opus-14B-Sm2/1762652580.476301", - "retrieved_timestamp": "1762652580.4763021", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Sombrero-Opus-14B-Sm2", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Sombrero-Opus-14B-Sm2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4272242095417935 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6609367219259568 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.486404833836858 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3884228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5088125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5344913563829787 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Sombrero-Opus-14B-Sm4/79a8057c-0791-42d6-adef-924a9cff0917.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Sombrero-Opus-14B-Sm4/79a8057c-0791-42d6-adef-924a9cff0917.json deleted file mode 100644 index 3982bb4cf2f35f58ca5c359115561cfeeb53f8de..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Sombrero-Opus-14B-Sm4/79a8057c-0791-42d6-adef-924a9cff0917.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Sombrero-Opus-14B-Sm4/1762652580.476516", - "retrieved_timestamp": "1762652580.4765172", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Sombrero-Opus-14B-Sm4", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Sombrero-Opus-14B-Sm4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4346932804957513 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6612776404137711 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4879154078549849 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5191666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5300033244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Sombrero-Opus-14B-Sm5/41acaa59-3232-4c6c-be64-0acb38019405.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Sombrero-Opus-14B-Sm5/41acaa59-3232-4c6c-be64-0acb38019405.json deleted file mode 100644 index 23196e8a6d544f246eb4e569a02c39ad1012cc25..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Sombrero-Opus-14B-Sm5/41acaa59-3232-4c6c-be64-0acb38019405.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Sombrero-Opus-14B-Sm5/1762652580.476726", - "retrieved_timestamp": "1762652580.476726", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Sombrero-Opus-14B-Sm5", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Sombrero-Opus-14B-Sm5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6851609285584471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6563944936055776 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4093655589123867 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38674496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.480625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5399767287234043 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Sqweeks-7B-Instruct/e0eaf433-d842-47c2-b47f-9e0ddd95df72.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Sqweeks-7B-Instruct/e0eaf433-d842-47c2-b47f-9e0ddd95df72.json deleted file mode 100644 index 289e319186f227e19003fb3bd7d1f1b8c9a48609..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Sqweeks-7B-Instruct/e0eaf433-d842-47c2-b47f-9e0ddd95df72.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Sqweeks-7B-Instruct/1762652580.476933", - "retrieved_timestamp": "1762652580.476934", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Sqweeks-7B-Instruct", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Sqweeks-7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21579852568961466 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4666692459456812 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5143504531722054 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44760416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3133311170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Tadpole-Opus-14B-Exp/0faf87d0-2b35-4256-acd9-4fe57f574d06.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Tadpole-Opus-14B-Exp/0faf87d0-2b35-4256-acd9-4fe57f574d06.json deleted file mode 100644 index dbf13854dc8fecc8c1fe7a09be3f4f7e998fd4d6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Tadpole-Opus-14B-Exp/0faf87d0-2b35-4256-acd9-4fe57f574d06.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Tadpole-Opus-14B-Exp/1762652580.477141", - "retrieved_timestamp": "1762652580.477142", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Tadpole-Opus-14B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Tadpole-Opus-14B-Exp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5749522378400422 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.636858708544215 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31344410876132933 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47284375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5322473404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Taurus-Opus-7B/01448351-5f76-4329-9bfd-4124e29de920.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Taurus-Opus-7B/01448351-5f76-4329-9bfd-4124e29de920.json deleted file mode 100644 index 176e9867c115b0d1b66a18519a4b94e9dea945fc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Taurus-Opus-7B/01448351-5f76-4329-9bfd-4124e29de920.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Taurus-Opus-7B/1762652580.477352", - "retrieved_timestamp": "1762652580.4773529", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Taurus-Opus-7B", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Taurus-Opus-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42232831110342783 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5367364587851736 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21676737160120846 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3263422818791946 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43988541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3951130319148936 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Triangulum-10B/ee5ad026-8df4-41c0-9158-3759d4a3ef02.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Triangulum-10B/ee5ad026-8df4-41c0-9158-3759d4a3ef02.json deleted file mode 100644 index e35838260dfcfa1f1ae0f4faab0ededdb3444da9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Triangulum-10B/ee5ad026-8df4-41c0-9158-3759d4a3ef02.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Triangulum-10B/1762652580.477568", - "retrieved_timestamp": "1762652580.477569", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Triangulum-10B", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Triangulum-10B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3229353670483207 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5968023910391113 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3549848942598187 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540268456375839 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41724999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4178025265957447 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Triangulum-5B/7d8850c3-61b2-41c3-a01b-8e23511558f6.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Triangulum-5B/7d8850c3-61b2-41c3-a01b-8e23511558f6.json deleted file mode 100644 index a621fa3974d955ad48e18da053eaed6d4c36e475..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Triangulum-5B/7d8850c3-61b2-41c3-a01b-8e23511558f6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Triangulum-5B/1762652580.477782", - "retrieved_timestamp": "1762652580.477782", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Triangulum-5B", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Triangulum-5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1283206336963701 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3124115848614622 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3445416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12234042553191489 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 5.413 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Triangulum-v2-10B/00f8547d-4bb9-4510-a29c-c37376c274c8.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Triangulum-v2-10B/00f8547d-4bb9-4510-a29c-c37376c274c8.json deleted file mode 100644 index 5b10da890241c6e5c45719a51d6ab3ee4d19f326..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Triangulum-v2-10B/00f8547d-4bb9-4510-a29c-c37376c274c8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Triangulum-v2-10B/1762652580.478046", - "retrieved_timestamp": "1762652580.478047", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Triangulum-v2-10B", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Triangulum-v2-10B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6705231009277606 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6064531367418446 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24471299093655588 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42807291666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44664228723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Tucana-Opus-14B-r999/f24694aa-cfe7-4a58-9f9e-f02c3e51d198.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Tucana-Opus-14B-r999/f24694aa-cfe7-4a58-9f9e-f02c3e51d198.json deleted file mode 100644 index d7e9c82f47eea9df454bad69a8cbdc7cdd84682d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Tucana-Opus-14B-r999/f24694aa-cfe7-4a58-9f9e-f02c3e51d198.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Tucana-Opus-14B-r999/1762652580.47826", - "retrieved_timestamp": "1762652580.478261", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Tucana-Opus-14B-r999", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Tucana-Opus-14B-r999" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.606725710005009 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6556888858891955 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39177852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47303125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5383976063829787 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Tulu-MathLingo-8B/fa0776bd-e95e-4d54-9004-82dff09307b8.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Tulu-MathLingo-8B/fa0776bd-e95e-4d54-9004-82dff09307b8.json deleted file mode 100644 index 38f3b857db34f80d951743044fb244f326876f5a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Tulu-MathLingo-8B/fa0776bd-e95e-4d54-9004-82dff09307b8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Tulu-MathLingo-8B/1762652580.478472", - "retrieved_timestamp": "1762652580.478473", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Tulu-MathLingo-8B", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Tulu-MathLingo-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5589402784611497 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4658807905856453 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14501510574018128 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38642708333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.304438164893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-Coder-7B-Elite14/06bc6426-310b-40ac-bbeb-0460215b8981.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-Coder-7B-Elite14/06bc6426-310b-40ac-bbeb-0460215b8981.json deleted file mode 100644 index bb73794721e4e4a93a50df215971b999810f8441..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-Coder-7B-Elite14/06bc6426-310b-40ac-bbeb-0460215b8981.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-7B-Elite14/1762652580.4786801", - "retrieved_timestamp": "1762652580.478681", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Viper-Coder-7B-Elite14", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Viper-Coder-7B-Elite14" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14882844186757802 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28285388717732607 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34215625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10887632978723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-Coder-Hybrid-v1.2/1f235238-05e0-4c76-b136-0bf0cf470ba2.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-Coder-Hybrid-v1.2/1f235238-05e0-4c76-b136-0bf0cf470ba2.json deleted file mode 100644 index f2b0ebca61f3d3ea6666c816c741aa8c332bbd30..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-Coder-Hybrid-v1.2/1f235238-05e0-4c76-b136-0bf0cf470ba2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-Hybrid-v1.2/1762652580.4788852", - "retrieved_timestamp": "1762652580.478886", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Viper-Coder-Hybrid-v1.2", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Viper-Coder-Hybrid-v1.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6735705705306365 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6390749226915919 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3330815709969788 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37416107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48217708333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5242686170212766 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-Coder-Hybrid-v1.3/17167e2a-1f42-4ea9-a947-8749259738a8.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-Coder-Hybrid-v1.3/17167e2a-1f42-4ea9-a947-8749259738a8.json deleted file mode 100644 index 76f1526454ef003a18f4794af9ef1029d5b09877..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-Coder-Hybrid-v1.3/17167e2a-1f42-4ea9-a947-8749259738a8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-Hybrid-v1.3/1762652580.4790971", - "retrieved_timestamp": "1762652580.479098", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Viper-Coder-Hybrid-v1.3", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Viper-Coder-Hybrid-v1.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7554776880898239 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6470999423290662 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4516616314199396 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33808724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4403229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5097240691489362 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-Coder-HybridMini-v1.3/1ca04810-a377-4390-944a-1a4ec91a7962.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-Coder-HybridMini-v1.3/1ca04810-a377-4390-944a-1a4ec91a7962.json deleted file mode 100644 index 1950619abd9afe7a98a90f84cf4ee7f4af80dc1e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-Coder-HybridMini-v1.3/1ca04810-a377-4390-944a-1a4ec91a7962.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-HybridMini-v1.3/1762652580.4793081", - "retrieved_timestamp": "1762652580.479309", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Viper-Coder-HybridMini-v1.3", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Viper-Coder-HybridMini-v1.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.610372699991578 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5365472959273401 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46299093655589124 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45048958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4351728723404255 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-Coder-v0.1/4d801ab4-0c2d-445a-beb6-4de824618e75.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-Coder-v0.1/4d801ab4-0c2d-445a-beb6-4de824618e75.json deleted file mode 100644 index 182cbbf3774429d344f532ac20204ec837958dec..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-Coder-v0.1/4d801ab4-0c2d-445a-beb6-4de824618e75.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-v0.1/1762652580.479637", - "retrieved_timestamp": "1762652580.479639", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Viper-Coder-v0.1", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Viper-Coder-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5521460835028835 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6143056870893655 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3270392749244713 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3540268456375839 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43944791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3927859042553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-Coder-v1.1/cc8e5b55-5b48-40c3-9e30-3c1740bc7da2.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-Coder-v1.1/cc8e5b55-5b48-40c3-9e30-3c1740bc7da2.json deleted file mode 100644 index f0e142aa8b3119112121473e459ebe731a954856..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-Coder-v1.1/cc8e5b55-5b48-40c3-9e30-3c1740bc7da2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-v1.1/1762652580.479969", - "retrieved_timestamp": "1762652580.47997", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Viper-Coder-v1.1", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Viper-Coder-v1.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.443236168920686 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6492289468853992 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5460725075528701 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.401006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5219270833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.523188164893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-Coder-v1.6-r999/ff5bb366-3692-441c-8e8f-8c23c5143aae.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-Coder-v1.6-r999/ff5bb366-3692-441c-8e8f-8c23c5143aae.json deleted file mode 100644 index ea8d60b12a3fd8df126258cdc6754cff00040e2e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-Coder-v1.6-r999/ff5bb366-3692-441c-8e8f-8c23c5143aae.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-v1.6-r999/1762652580.480214", - "retrieved_timestamp": "1762652580.480215", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Viper-Coder-v1.6-r999", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Viper-Coder-v1.6-r999" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4432860366050967 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6492289468853992 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5657099697885196 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.401006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5219270833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.523188164893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-Coder-v1.7-Vsm6/14b789c6-8b7f-4292-8ced-279e7ee856a5.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-Coder-v1.7-Vsm6/14b789c6-8b7f-4292-8ced-279e7ee856a5.json deleted file mode 100644 index 4c66a818acde2324a997328b90ae6a92f701cb7a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-Coder-v1.7-Vsm6/14b789c6-8b7f-4292-8ced-279e7ee856a5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-Coder-v1.7-Vsm6/1762652580.480439", - "retrieved_timestamp": "1762652580.4804401", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Viper-Coder-v1.7-Vsm6", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Viper-Coder-v1.7-Vsm6" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5003889679384035 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6502342489348574 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4645015105740181 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39681208053691275 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47675 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5287566489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-OneCoder-UIGEN/5d22f1b7-c062-4c46-8da1-4c895fcf8b9c.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-OneCoder-UIGEN/5d22f1b7-c062-4c46-8da1-4c895fcf8b9c.json deleted file mode 100644 index 33bee959615bb12998e93c05a500ae9b4eb3a181..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Viper-OneCoder-UIGEN/5d22f1b7-c062-4c46-8da1-4c895fcf8b9c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Viper-OneCoder-UIGEN/1762652580.480654", - "retrieved_timestamp": "1762652580.480654", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Viper-OneCoder-UIGEN", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Viper-OneCoder-UIGEN" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4691895282295421 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6046507657311738 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3867069486404834 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3422818791946309 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45141666666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.390375664893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Volans-Opus-14B-Exp/735058a7-c22e-42a7-94f5-d7e2459848b3.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Volans-Opus-14B-Exp/735058a7-c22e-42a7-94f5-d7e2459848b3.json deleted file mode 100644 index d69d4cf5d013cd39e43b435cf286b8feb941ec33..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_Volans-Opus-14B-Exp/735058a7-c22e-42a7-94f5-d7e2459848b3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_Volans-Opus-14B-Exp/1762652580.480862", - "retrieved_timestamp": "1762652580.480863", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/Volans-Opus-14B-Exp", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/Volans-Opus-14B-Exp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5867675545330834 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6521211711040636 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.425226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3850671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4871979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5384807180851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_WebMind-7B-v0.1/00637ba6-99e5-4940-94ab-a620ff248ca1.json b/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_WebMind-7B-v0.1/00637ba6-99e5-4940-94ab-a620ff248ca1.json deleted file mode 100644 index 777381a010e337aad8f138c748cf694f1dc3ac6b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/prithivMLmods/prithivMLmods_WebMind-7B-v0.1/00637ba6-99e5-4940-94ab-a620ff248ca1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/prithivMLmods_WebMind-7B-v0.1/1762652580.481075", - "retrieved_timestamp": "1762652580.481076", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "prithivMLmods/WebMind-7B-v0.1", - "developer": "prithivMLmods", - "inference_platform": "unknown", - "id": "prithivMLmods/WebMind-7B-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5278161943642867 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5433559211614739 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3648036253776435 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4537395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4279421542553192 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/qingy2019/qingy2019_Oracle-14B/90a36ffd-8eeb-44e8-9b7b-dbd56238d0a6.json b/leaderboard_data/HFOpenLLMv2/qingy2019/qingy2019_Oracle-14B/90a36ffd-8eeb-44e8-9b7b-dbd56238d0a6.json deleted file mode 100644 index f0d5a659a8a1aa77de1254bca22504ccbef81abf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/qingy2019/qingy2019_Oracle-14B/90a36ffd-8eeb-44e8-9b7b-dbd56238d0a6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2019_Oracle-14B/1762652580.4822989", - "retrieved_timestamp": "1762652580.4822989", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2019/Oracle-14B", - "developer": "qingy2019", - "inference_platform": "unknown", - "id": "qingy2019/Oracle-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23583203677353867 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4611577021562399 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06419939577039276 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2575503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37166666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23819813829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 13.668 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/qingy2019/qingy2019_Oracle-14B/fc5c5eff-8314-4cb2-8ba4-b562096cfe1f.json b/leaderboard_data/HFOpenLLMv2/qingy2019/qingy2019_Oracle-14B/fc5c5eff-8314-4cb2-8ba4-b562096cfe1f.json deleted file mode 100644 index 6fa503403d78c252740baa3bc0b800f0ccf00a1b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/qingy2019/qingy2019_Oracle-14B/fc5c5eff-8314-4cb2-8ba4-b562096cfe1f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2019_Oracle-14B/1762652580.482562", - "retrieved_timestamp": "1762652580.482562", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2019/Oracle-14B", - "developer": "qingy2019", - "inference_platform": "unknown", - "id": "qingy2019/Oracle-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24007854714380067 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4622299618883472 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07250755287009064 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37033333333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2378656914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 13.668 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/qingy2019/qingy2019_Qwen2.5-Math-14B-Instruct-Alpha/7bc9676d-6186-4b2d-8b4b-4a3786f3ed40.json b/leaderboard_data/HFOpenLLMv2/qingy2019/qingy2019_Qwen2.5-Math-14B-Instruct-Alpha/7bc9676d-6186-4b2d-8b4b-4a3786f3ed40.json deleted file mode 100644 index b8f8a5a4ccffa16b569d164cf53ac653196db4d8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/qingy2019/qingy2019_Qwen2.5-Math-14B-Instruct-Alpha/7bc9676d-6186-4b2d-8b4b-4a3786f3ed40.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2019_Qwen2.5-Math-14B-Instruct-Alpha/1762652580.4831731", - "retrieved_timestamp": "1762652580.4831731", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2019/Qwen2.5-Math-14B-Instruct-Alpha", - "developer": "qingy2019", - "inference_platform": "unknown", - "id": "qingy2019/Qwen2.5-Math-14B-Instruct-Alpha" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5980830862112528 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6375080075350833 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31419939577039274 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699664429530201 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4649375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5330784574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/qingy2019/qingy2019_Qwen2.5-Math-14B-Instruct-Pro/c1a0b34a-d3b5-42b9-b779-b31b9678faed.json b/leaderboard_data/HFOpenLLMv2/qingy2019/qingy2019_Qwen2.5-Math-14B-Instruct-Pro/c1a0b34a-d3b5-42b9-b779-b31b9678faed.json deleted file mode 100644 index 0e746e807b78a472f0d6cb8015fcb6829dce4761..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/qingy2019/qingy2019_Qwen2.5-Math-14B-Instruct-Pro/c1a0b34a-d3b5-42b9-b779-b31b9678faed.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2019_Qwen2.5-Math-14B-Instruct-Pro/1762652580.483387", - "retrieved_timestamp": "1762652580.483388", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2019/Qwen2.5-Math-14B-Instruct-Pro", - "developer": "qingy2019", - "inference_platform": "unknown", - "id": "qingy2019/Qwen2.5-Math-14B-Instruct-Pro" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1921678923035324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5318689754519911 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37403125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35580119680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/qingy2019/qingy2019_Qwen2.5-Math-14B-Instruct/46d47e9a-6378-4eb5-a43d-f8e6a7c51674.json b/leaderboard_data/HFOpenLLMv2/qingy2019/qingy2019_Qwen2.5-Math-14B-Instruct/46d47e9a-6378-4eb5-a43d-f8e6a7c51674.json deleted file mode 100644 index 8520f8182a39c13eae2f6bddd80d2671d9423559..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/qingy2019/qingy2019_Qwen2.5-Math-14B-Instruct/46d47e9a-6378-4eb5-a43d-f8e6a7c51674.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2019_Qwen2.5-Math-14B-Instruct/1762652580.482764", - "retrieved_timestamp": "1762652580.482764", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2019/Qwen2.5-Math-14B-Instruct", - "developer": "qingy2019", - "inference_platform": "unknown", - "id": "qingy2019/Qwen2.5-Math-14B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6066259746361875 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6350068875885949 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3724832214765101 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4757291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5330784574468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/qingy2019/qingy2019_Qwen2.5-Math-14B-Instruct/5a2e7119-5fe6-4d3c-8706-01e22ef5b121.json b/leaderboard_data/HFOpenLLMv2/qingy2019/qingy2019_Qwen2.5-Math-14B-Instruct/5a2e7119-5fe6-4d3c-8706-01e22ef5b121.json deleted file mode 100644 index 488958718da47615860bfc48ff0ba3aa47f779e5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/qingy2019/qingy2019_Qwen2.5-Math-14B-Instruct/5a2e7119-5fe6-4d3c-8706-01e22ef5b121.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2019_Qwen2.5-Math-14B-Instruct/1762652580.48299", - "retrieved_timestamp": "1762652580.4829912", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2019/Qwen2.5-Math-14B-Instruct", - "developer": "qingy2019", - "inference_platform": "unknown", - "id": "qingy2019/Qwen2.5-Math-14B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6005310354304356 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6356492397286339 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4756666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5339095744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/qingy2019/qingy2019_Qwen2.5-Ultimate-14B-Instruct/655920b7-5687-4555-8890-ab1d08f3f00d.json b/leaderboard_data/HFOpenLLMv2/qingy2019/qingy2019_Qwen2.5-Ultimate-14B-Instruct/655920b7-5687-4555-8890-ab1d08f3f00d.json deleted file mode 100644 index 0c44a01a66417e729dee3790d6c0c924cda587d8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/qingy2019/qingy2019_Qwen2.5-Ultimate-14B-Instruct/655920b7-5687-4555-8890-ab1d08f3f00d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2019_Qwen2.5-Ultimate-14B-Instruct/1762652580.483648", - "retrieved_timestamp": "1762652580.483649", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2019/Qwen2.5-Ultimate-14B-Instruct", - "developer": "qingy2019", - "inference_platform": "unknown", - "id": "qingy2019/Qwen2.5-Ultimate-14B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39380177927897975 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5841561592804249 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2892749244712991 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565436241610738 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4135 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4929355053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Benchmaxx-Llama-3.2-1B-Instruct/52ed2d5b-d9be-4f3f-b193-8d4cca4ded62.json b/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Benchmaxx-Llama-3.2-1B-Instruct/52ed2d5b-d9be-4f3f-b193-8d4cca4ded62.json deleted file mode 100644 index 8b01baf8045e156c9fdfd232084aa2823c65e75a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Benchmaxx-Llama-3.2-1B-Instruct/52ed2d5b-d9be-4f3f-b193-8d4cca4ded62.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2024_Benchmaxx-Llama-3.2-1B-Instruct/1762652580.483871", - "retrieved_timestamp": "1762652580.483871", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2024/Benchmaxx-Llama-3.2-1B-Instruct", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Benchmaxx-Llama-3.2-1B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20136016879657087 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8269136508088061 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48036253776435045 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2835570469798658 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3446354166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11128656914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Eyas-17B-Instruct/c45cc504-88b0-4110-9650-47f4d328f769.json b/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Eyas-17B-Instruct/c45cc504-88b0-4110-9650-47f4d328f769.json deleted file mode 100644 index 11f33d55ad9b5a7f2bae58694490267a2b59617c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Eyas-17B-Instruct/c45cc504-88b0-4110-9650-47f4d328f769.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2024_Eyas-17B-Instruct/1762652580.484141", - "retrieved_timestamp": "1762652580.484141", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2024/Eyas-17B-Instruct", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Eyas-17B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6574588757829227 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6084550080292097 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24697885196374622 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45216666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43425864361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 17.431 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Falcon3-2x10B-MoE-Instruct/302e9f42-b9fa-4e2b-acda-70c391f9b6bc.json b/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Falcon3-2x10B-MoE-Instruct/302e9f42-b9fa-4e2b-acda-70c391f9b6bc.json deleted file mode 100644 index 6ca528ac61c371c328e038d13ffc4c448893958a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Falcon3-2x10B-MoE-Instruct/302e9f42-b9fa-4e2b-acda-70c391f9b6bc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2024_Falcon3-2x10B-MoE-Instruct/1762652580.484361", - "retrieved_timestamp": "1762652580.484362", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2024/Falcon3-2x10B-MoE-Instruct", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Falcon3-2x10B-MoE-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7849783020164276 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6184925726037823 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2794561933534743 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42835416666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44232047872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 18.799 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Fusion-14B-Instruct/123331fd-a4fb-4dc6-a30e-17f230618df9.json b/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Fusion-14B-Instruct/123331fd-a4fb-4dc6-a30e-17f230618df9.json deleted file mode 100644 index d54eb875f0c06afb2ae25a0022d4f24618c9b194..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Fusion-14B-Instruct/123331fd-a4fb-4dc6-a30e-17f230618df9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2024_Fusion-14B-Instruct/1762652580.4845738", - "retrieved_timestamp": "1762652580.484575", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2024/Fusion-14B-Instruct", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Fusion-14B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7259770741632203 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6395930812164231 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3368580060422961 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3548657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44004166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.504404920212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Fusion2-14B-Instruct/cc17acb9-0f4e-46a9-a250-eb79a0fedc3f.json b/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Fusion2-14B-Instruct/cc17acb9-0f4e-46a9-a250-eb79a0fedc3f.json deleted file mode 100644 index ae9e44fec4bac81ffedc585866d076e765f38a89..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Fusion2-14B-Instruct/cc17acb9-0f4e-46a9-a250-eb79a0fedc3f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2024_Fusion2-14B-Instruct/1762652580.4848042", - "retrieved_timestamp": "1762652580.4848042", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2024/Fusion2-14B-Instruct", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Fusion2-14B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6064010159709571 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.611852372286455 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447986577181208 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46338541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5050698138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Fusion4-14B-Instruct/bb7b828c-07a0-4530-8c2e-8e4b6370cbb4.json b/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Fusion4-14B-Instruct/bb7b828c-07a0-4530-8c2e-8e4b6370cbb4.json deleted file mode 100644 index e4b099f0088d61dc17c7b611c58e70e799ee70b2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Fusion4-14B-Instruct/bb7b828c-07a0-4530-8c2e-8e4b6370cbb4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2024_Fusion4-14B-Instruct/1762652580.4850292", - "retrieved_timestamp": "1762652580.48503", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2024/Fusion4-14B-Instruct", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Fusion4-14B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7648949232480928 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6542520469477617 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38821752265861026 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4325729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5193650265957447 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_OwO-14B-Instruct/f524ebb6-64cb-43e3-8cff-6305ef122890.json b/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_OwO-14B-Instruct/f524ebb6-64cb-43e3-8cff-6305ef122890.json deleted file mode 100644 index 7c4acf0b8bb6ec71d2968aef6a43c0f1616d3143..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_OwO-14B-Instruct/f524ebb6-64cb-43e3-8cff-6305ef122890.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2024_OwO-14B-Instruct/1762652580.485259", - "retrieved_timestamp": "1762652580.485259", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2024/OwO-14B-Instruct", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/OwO-14B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1383119013107444 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6164807172760662 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4161631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640939597315436 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44068749999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5181183510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_QwEnlarge-16B-Instruct/dd44686d-13da-4c88-81d3-6d01676baa4e.json b/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_QwEnlarge-16B-Instruct/dd44686d-13da-4c88-81d3-6d01676baa4e.json deleted file mode 100644 index dae4c6c16f3ee5d1e9eed5cec136578216fe9fdf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_QwEnlarge-16B-Instruct/dd44686d-13da-4c88-81d3-6d01676baa4e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2024_QwEnlarge-16B-Instruct/1762652580.485478", - "retrieved_timestamp": "1762652580.4854789", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2024/QwEnlarge-16B-Instruct", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/QwEnlarge-16B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7801821389468832 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5949341698087998 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45996978851963743 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.410125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44755651595744683 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 15.871 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_QwQ-14B-Math-v0.2/4092651d-1d14-408d-922d-6189858aab36.json b/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_QwQ-14B-Math-v0.2/4092651d-1d14-408d-922d-6189858aab36.json deleted file mode 100644 index fd7a4a01bc23bc8dcc8154f9de032b5bee4e41b3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_QwQ-14B-Math-v0.2/4092651d-1d14-408d-922d-6189858aab36.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2024_QwQ-14B-Math-v0.2/1762652580.48586", - "retrieved_timestamp": "1762652580.4858618", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2024/QwQ-14B-Math-v0.2", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/QwQ-14B-Math-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33909692948044523 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.573097955260854 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4811178247734139 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40209374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47997007978723405 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Qwarkstar-4B-Instruct-Preview/701a4aa4-b057-42d8-8b89-dd59950d1981.json b/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Qwarkstar-4B-Instruct-Preview/701a4aa4-b057-42d8-8b89-dd59950d1981.json deleted file mode 100644 index 42d5746ee90b764e447629a21fd227d5de469402..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Qwarkstar-4B-Instruct-Preview/701a4aa4-b057-42d8-8b89-dd59950d1981.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2024_Qwarkstar-4B-Instruct-Preview/1762652580.4865122", - "retrieved_timestamp": "1762652580.486513", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2024/Qwarkstar-4B-Instruct-Preview", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Qwarkstar-4B-Instruct-Preview" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5324372664530114 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43584381808469397 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38959374999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.250249335106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 4.473 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Qwarkstar-4B/9f586b02-3514-46f7-b1df-4e78f286893e.json b/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Qwarkstar-4B/9f586b02-3514-46f7-b1df-4e78f286893e.json deleted file mode 100644 index 25fb349a46ea2582ec2f211ceeff55e1c7cf36c6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Qwarkstar-4B/9f586b02-3514-46f7-b1df-4e78f286893e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2024_Qwarkstar-4B/1762652580.486229", - "retrieved_timestamp": "1762652580.4862301", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2024/Qwarkstar-4B", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Qwarkstar-4B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19941200459225966 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40149118131308104 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08610271903323263 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44283333333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24251994680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 4.473 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Qwen2.5-Coder-Draft-1.5B-Instruct/40662202-f976-4dc0-acf2-f4794bb5d744.json b/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Qwen2.5-Coder-Draft-1.5B-Instruct/40662202-f976-4dc0-acf2-f4794bb5d744.json deleted file mode 100644 index dd23ddc7a476a84751542774b72cd4541be5e2ca..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Qwen2.5-Coder-Draft-1.5B-Instruct/40662202-f976-4dc0-acf2-f4794bb5d744.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2024_Qwen2.5-Coder-Draft-1.5B-Instruct/1762652580.487137", - "retrieved_timestamp": "1762652580.487138", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2024/Qwen2.5-Coder-Draft-1.5B-Instruct", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Qwen2.5-Coder-Draft-1.5B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4125110262991086 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3836795503038973 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1578549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35800000000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22440159574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Qwen2.5-Math-14B-Instruct-Alpha/011f32a0-458f-4bea-8192-b18a19ddd0c7.json b/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Qwen2.5-Math-14B-Instruct-Alpha/011f32a0-458f-4bea-8192-b18a19ddd0c7.json deleted file mode 100644 index ab4c7d40165d2fb661907fc3dcfc9dad11612aeb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Qwen2.5-Math-14B-Instruct-Alpha/011f32a0-458f-4bea-8192-b18a19ddd0c7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2024_Qwen2.5-Math-14B-Instruct-Alpha/1762652580.48737", - "retrieved_timestamp": "1762652580.487371", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2024/Qwen2.5-Math-14B-Instruct-Alpha", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Qwen2.5-Math-14B-Instruct-Alpha" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7704402097545624 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.646486159387426 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42900302114803623 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.348993288590604 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40209374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49659242021276595 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Qwen2.5-Math-14B-Instruct-Preview/aab84d55-c491-402c-9ed0-59347573fea9.json b/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Qwen2.5-Math-14B-Instruct-Preview/aab84d55-c491-402c-9ed0-59347573fea9.json deleted file mode 100644 index d5c4aa56c3261b8189250763cdc7fc297e68e14c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Qwen2.5-Math-14B-Instruct-Preview/aab84d55-c491-402c-9ed0-59347573fea9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2024_Qwen2.5-Math-14B-Instruct-Preview/1762652580.487701", - "retrieved_timestamp": "1762652580.4877021", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2024/Qwen2.5-Math-14B-Instruct-Preview", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Qwen2.5-Math-14B-Instruct-Preview" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7825802204816554 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6293942245934432 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47583081570996977 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060402684563756 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4114583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49933510638297873 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Qwen2.6-14B-Instruct/c27064c4-93d1-41a1-a61f-cde7a991b047.json b/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Qwen2.6-14B-Instruct/c27064c4-93d1-41a1-a61f-cde7a991b047.json deleted file mode 100644 index 79b7eb6d3f9a9dee6f55d259d93d34e26a1fa126..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Qwen2.6-14B-Instruct/c27064c4-93d1-41a1-a61f-cde7a991b047.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2024_Qwen2.6-14B-Instruct/1762652580.48806", - "retrieved_timestamp": "1762652580.488061", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2024/Qwen2.6-14B-Instruct", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Qwen2.6-14B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5810970447302047 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6394142844483001 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30513595166163143 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37919463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4569375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5285073138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Qwen2.6-Math-14B-Instruct/37822fb0-4ada-4413-aa77-6938678994d9.json b/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Qwen2.6-Math-14B-Instruct/37822fb0-4ada-4413-aa77-6938678994d9.json deleted file mode 100644 index 65752592e763e833e71eaa9d74f6d02476dc354d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/qingy2024/qingy2024_Qwen2.6-Math-14B-Instruct/37822fb0-4ada-4413-aa77-6938678994d9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/qingy2024_Qwen2.6-Math-14B-Instruct/1762652580.488592", - "retrieved_timestamp": "1762652580.4885938", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qingy2024/Qwen2.6-Math-14B-Instruct", - "developer": "qingy2024", - "inference_platform": "unknown", - "id": "qingy2024/Qwen2.6-Math-14B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38623186478543603 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6324437508110833 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42900302114803623 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699664429530201 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4758541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5241023936170213 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/raphgg/raphgg_test-2.5-72B/133866e4-6e3a-4d88-95f3-d7e1bd414988.json b/leaderboard_data/HFOpenLLMv2/raphgg/raphgg_test-2.5-72B/133866e4-6e3a-4d88-95f3-d7e1bd414988.json deleted file mode 100644 index d0cb03da043ea981e6eb7256f4726e11c8e49149..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/raphgg/raphgg_test-2.5-72B/133866e4-6e3a-4d88-95f3-d7e1bd414988.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/raphgg_test-2.5-72B/1762652580.489263", - "retrieved_timestamp": "1762652580.489265", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "raphgg/test-2.5-72B", - "developer": "raphgg", - "inference_platform": "unknown", - "id": "raphgg/test-2.5-72B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8437047035199936 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7266099425567868 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4108761329305136 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38926174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48118750000000005 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5836934840425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/rasyosef/rasyosef_Mistral-NeMo-Minitron-8B-Chat/cb8d28e5-d423-4a62-8b73-7542fb990d8e.json b/leaderboard_data/HFOpenLLMv2/rasyosef/rasyosef_Mistral-NeMo-Minitron-8B-Chat/cb8d28e5-d423-4a62-8b73-7542fb990d8e.json deleted file mode 100644 index de266d28ce906643746975c1b16292817a91fd75..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/rasyosef/rasyosef_Mistral-NeMo-Minitron-8B-Chat/cb8d28e5-d423-4a62-8b73-7542fb990d8e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rasyosef_Mistral-NeMo-Minitron-8B-Chat/1762652580.4896698", - "retrieved_timestamp": "1762652580.489672", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rasyosef/Mistral-NeMo-Minitron-8B-Chat", - "developer": "rasyosef", - "inference_platform": "unknown", - "id": "rasyosef/Mistral-NeMo-Minitron-8B-Chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4451843331249973 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47594353379058535 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027190332326283987 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4304270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2403590425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 8.414 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/rasyosef/rasyosef_Phi-1_5-Instruct-v0.1/e4d90e2b-f510-4941-8e10-be027693c3d4.json b/leaderboard_data/HFOpenLLMv2/rasyosef/rasyosef_Phi-1_5-Instruct-v0.1/e4d90e2b-f510-4941-8e10-be027693c3d4.json deleted file mode 100644 index 3863625849379e2a4361cf89e11b50714469d54e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/rasyosef/rasyosef_Phi-1_5-Instruct-v0.1/e4d90e2b-f510-4941-8e10-be027693c3d4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rasyosef_Phi-1_5-Instruct-v0.1/1762652580.4902148", - "retrieved_timestamp": "1762652580.490216", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rasyosef/Phi-1_5-Instruct-v0.1", - "developer": "rasyosef", - "inference_platform": "unknown", - "id": "rasyosef/Phi-1_5-Instruct-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24022815019703275 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3117898107092894 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34215625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15616688829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "PhiForCausalLM", - "params_billions": 1.415 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/rasyosef/rasyosef_phi-2-instruct-apo/f56f3dda-a774-45d7-b949-b5e04174a413.json b/leaderboard_data/HFOpenLLMv2/rasyosef/rasyosef_phi-2-instruct-apo/f56f3dda-a774-45d7-b949-b5e04174a413.json deleted file mode 100644 index 1bcab473c3565ee5072ce71f0cfdba65a7220044..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/rasyosef/rasyosef_phi-2-instruct-apo/f56f3dda-a774-45d7-b949-b5e04174a413.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rasyosef_phi-2-instruct-apo/1762652580.490494", - "retrieved_timestamp": "1762652580.490495", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rasyosef/phi-2-instruct-apo", - "developer": "rasyosef", - "inference_platform": "unknown", - "id": "rasyosef/phi-2-instruct-apo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31459194936102874 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44450964630048634 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.030211480362537766 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33421875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21550864361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.775 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/rasyosef/rasyosef_phi-2-instruct-v0.1/556eef3e-7c58-446d-acc5-26af0413d2bc.json b/leaderboard_data/HFOpenLLMv2/rasyosef/rasyosef_phi-2-instruct-v0.1/556eef3e-7c58-446d-acc5-26af0413d2bc.json deleted file mode 100644 index f96f0075d04784610e82f858f9e268feb6397fdd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/rasyosef/rasyosef_phi-2-instruct-v0.1/556eef3e-7c58-446d-acc5-26af0413d2bc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rasyosef_phi-2-instruct-v0.1/1762652580.490772", - "retrieved_timestamp": "1762652580.490773", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rasyosef/phi-2-instruct-v0.1", - "developer": "rasyosef", - "inference_platform": "unknown", - "id": "rasyosef/phi-2-instruct-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3681476260765879 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47261184292654473 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22465093085106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "PhiForCausalLM", - "params_billions": 2.775 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/realtreetune/realtreetune_rho-1b-sft-MATH/86234365-2d3e-4d49-96e8-8f034990c902.json b/leaderboard_data/HFOpenLLMv2/realtreetune/realtreetune_rho-1b-sft-MATH/86234365-2d3e-4d49-96e8-8f034990c902.json deleted file mode 100644 index 0ee2b9e2f1336546ac7901f8ef4b0b7b45a36dc3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/realtreetune/realtreetune_rho-1b-sft-MATH/86234365-2d3e-4d49-96e8-8f034990c902.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/realtreetune_rho-1b-sft-MATH/1762652580.4910588", - "retrieved_timestamp": "1762652580.49106", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "realtreetune/rho-1b-sft-MATH", - "developer": "realtreetune", - "inference_platform": "unknown", - "id": "realtreetune/rho-1b-sft-MATH" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.212101668018635 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3144153389594046 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03474320241691843 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34584375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11170212765957446 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.1 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/redrix/redrix_AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS/60e8f886-62fa-444a-8193-273905cbd4e8.json b/leaderboard_data/HFOpenLLMv2/redrix/redrix_AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS/60e8f886-62fa-444a-8193-273905cbd4e8.json deleted file mode 100644 index ecffb74dded8504afd2bd1f04f187d23c7cbff1f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/redrix/redrix_AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS/60e8f886-62fa-444a-8193-273905cbd4e8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/redrix_AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS/1762652580.493407", - "retrieved_timestamp": "1762652580.493408", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "redrix/AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS", - "developer": "redrix", - "inference_platform": "unknown", - "id": "redrix/AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5359590331431713 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5128840998052852 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11329305135951662 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38178124999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3179853723404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/redrix/redrix_patricide-12B-Unslop-Mell/16052a72-b235-47df-ac4c-fe54e49b9131.json b/leaderboard_data/HFOpenLLMv2/redrix/redrix_patricide-12B-Unslop-Mell/16052a72-b235-47df-ac4c-fe54e49b9131.json deleted file mode 100644 index a9d4ced157a0ebcf5dd5633d5611e0e91f715269..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/redrix/redrix_patricide-12B-Unslop-Mell/16052a72-b235-47df-ac4c-fe54e49b9131.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/redrix_patricide-12B-Unslop-Mell/1762652580.4937751", - "retrieved_timestamp": "1762652580.4937768", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "redrix/patricide-12B-Unslop-Mell", - "developer": "redrix", - "inference_platform": "unknown", - "id": "redrix/patricide-12B-Unslop-Mell" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40739016919551235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5398666865853622 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13141993957703926 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4025833333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3570478723404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/rhplus0831/rhplus0831_maid-yuzu-v7/65e47b2d-982b-4fa8-b5bf-e002cf3cc293.json b/leaderboard_data/HFOpenLLMv2/rhplus0831/rhplus0831_maid-yuzu-v7/65e47b2d-982b-4fa8-b5bf-e002cf3cc293.json deleted file mode 100644 index 1d840db6917d0c792f636119eaa3f192bdf91891..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/rhplus0831/rhplus0831_maid-yuzu-v7/65e47b2d-982b-4fa8-b5bf-e002cf3cc293.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rhplus0831_maid-yuzu-v7/1762652580.494505", - "retrieved_timestamp": "1762652580.494506", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rhplus0831/maid-yuzu-v7", - "developer": "rhplus0831", - "inference_platform": "unknown", - "id": "rhplus0831/maid-yuzu-v7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6462430794735745 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.480491692312673 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41362499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35397273936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/rhymes-ai/rhymes-ai_Aria/611c449e-3d86-4dea-94a8-a2b7719fa1ae.json b/leaderboard_data/HFOpenLLMv2/rhymes-ai/rhymes-ai_Aria/611c449e-3d86-4dea-94a8-a2b7719fa1ae.json deleted file mode 100644 index 44c8c405346375aaf04a5a6f532e8f84e9e11331..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/rhymes-ai/rhymes-ai_Aria/611c449e-3d86-4dea-94a8-a2b7719fa1ae.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rhymes-ai_Aria/1762652580.4949272", - "retrieved_timestamp": "1762652580.494928", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rhymes-ai/Aria", - "developer": "rhymes-ai", - "inference_platform": "unknown", - "id": "rhymes-ai/Aria" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4773079872516035 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5695312446413633 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1933534743202417 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624161073825503 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44049202127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "AriaForConditionalGeneration", - "params_billions": 25.307 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/rmdhirr/rmdhirr_Gluon-8B/a1f5e06b-17f7-41d1-ab9d-c0e4b22d10cf.json b/leaderboard_data/HFOpenLLMv2/rmdhirr/rmdhirr_Gluon-8B/a1f5e06b-17f7-41d1-ab9d-c0e4b22d10cf.json deleted file mode 100644 index 220a39cca0b15d37ee471a150e49328d69cdf902..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/rmdhirr/rmdhirr_Gluon-8B/a1f5e06b-17f7-41d1-ab9d-c0e4b22d10cf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rmdhirr_Gluon-8B/1762652580.496151", - "retrieved_timestamp": "1762652580.4961522", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rmdhirr/Gluon-8B", - "developer": "rmdhirr", - "inference_platform": "unknown", - "id": "rmdhirr/Gluon-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5052848663767692 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5153305292144984 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14425981873111782 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4038854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38081781914893614 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/rombodawg/rombodawg_Rombos-LLM-V2.6-Nemotron-70b/caf5de06-ab13-45e4-ac51-d4e40796952e.json b/leaderboard_data/HFOpenLLMv2/rombodawg/rombodawg_Rombos-LLM-V2.6-Nemotron-70b/caf5de06-ab13-45e4-ac51-d4e40796952e.json deleted file mode 100644 index d02caeb71b64290b6274f9e47e64d99b9abe18c9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/rombodawg/rombodawg_Rombos-LLM-V2.6-Nemotron-70b/caf5de06-ab13-45e4-ac51-d4e40796952e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.6-Nemotron-70b/1762652580.499233", - "retrieved_timestamp": "1762652580.499234", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rombodawg/Rombos-LLM-V2.6-Nemotron-70b", - "developer": "rombodawg", - "inference_platform": "unknown", - "id": "rombodawg/Rombos-LLM-V2.6-Nemotron-70b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7526551771521784 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6937699482580332 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3330815709969788 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40604026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46686458333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5329122340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/rombodawg/rombodawg_rombos_Replete-Coder-Instruct-8b-Merged/929abd2b-3f19-4df3-81ab-406751d52919.json b/leaderboard_data/HFOpenLLMv2/rombodawg/rombodawg_rombos_Replete-Coder-Instruct-8b-Merged/929abd2b-3f19-4df3-81ab-406751d52919.json deleted file mode 100644 index 92aa193c080ffcd604d128e07c1bf5e786e31bea..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/rombodawg/rombodawg_rombos_Replete-Coder-Instruct-8b-Merged/929abd2b-3f19-4df3-81ab-406751d52919.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rombodawg_rombos_Replete-Coder-Instruct-8b-Merged/1762652580.499815", - "retrieved_timestamp": "1762652580.499816", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rombodawg/rombos_Replete-Coder-Instruct-8b-Merged", - "developer": "rombodawg", - "inference_platform": "unknown", - "id": "rombodawg/rombos_Replete-Coder-Instruct-8b-Merged" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5387571643239937 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4461693860075828 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36603125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18085106382978725 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/rootxhacker/rootxhacker_Apollo-70B/14421b7b-6f4d-4b4f-91e1-27a9c0919498.json b/leaderboard_data/HFOpenLLMv2/rootxhacker/rootxhacker_Apollo-70B/14421b7b-6f4d-4b4f-91e1-27a9c0919498.json deleted file mode 100644 index cf85dff6523bd1e6b1faf49a9556df48de54ebb6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/rootxhacker/rootxhacker_Apollo-70B/14421b7b-6f4d-4b4f-91e1-27a9c0919498.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rootxhacker_Apollo-70B/1762652580.500333", - "retrieved_timestamp": "1762652580.500333", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rootxhacker/Apollo-70B", - "developer": "rootxhacker", - "inference_platform": "unknown", - "id": "rootxhacker/Apollo-70B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5098560707810831 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6804215148524603 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5611782477341389 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45721476510067116 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4947708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5279255319148937 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 70.554 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/rootxhacker/rootxhacker_Apollo_v2-32B/2a3e824e-8fb2-41ac-b548-30ea18ecdceb.json b/leaderboard_data/HFOpenLLMv2/rootxhacker/rootxhacker_Apollo_v2-32B/2a3e824e-8fb2-41ac-b548-30ea18ecdceb.json deleted file mode 100644 index c831cce6e48174b0106cbfd7097dcac45683eb3f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/rootxhacker/rootxhacker_Apollo_v2-32B/2a3e824e-8fb2-41ac-b548-30ea18ecdceb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rootxhacker_Apollo_v2-32B/1762652580.500606", - "retrieved_timestamp": "1762652580.500606", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rootxhacker/Apollo_v2-32B", - "developer": "rootxhacker", - "inference_platform": "unknown", - "id": "rootxhacker/Apollo_v2-32B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4280486885907171 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7072274795963693 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42749244712990936 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3783557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4993854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5869348404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/rootxhacker/rootxhacker_apollo-7B/ce364468-f5ef-4a29-8026-89e455fa4350.json b/leaderboard_data/HFOpenLLMv2/rootxhacker/rootxhacker_apollo-7B/ce364468-f5ef-4a29-8026-89e455fa4350.json deleted file mode 100644 index 2d8704f459fac21852a05bf9dc7b49fb595b5216..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/rootxhacker/rootxhacker_apollo-7B/ce364468-f5ef-4a29-8026-89e455fa4350.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rootxhacker_apollo-7B/1762652580.500841", - "retrieved_timestamp": "1762652580.500842", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rootxhacker/apollo-7B", - "developer": "rootxhacker", - "inference_platform": "unknown", - "id": "rootxhacker/apollo-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29533304964161755 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3636262699883149 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0256797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41312499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17478390957446807 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/rsh345/rsh345_mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B/18284816-2f69-41c5-8cf3-5209ed77cb7d.json b/leaderboard_data/HFOpenLLMv2/rsh345/rsh345_mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B/18284816-2f69-41c5-8cf3-5209ed77cb7d.json deleted file mode 100644 index 7b65e4ae48259f5465922368fd326f29eaecdb27..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/rsh345/rsh345_mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B/18284816-2f69-41c5-8cf3-5209ed77cb7d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rsh345_mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B/1762652580.501065", - "retrieved_timestamp": "1762652580.501066", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rsh345/mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B", - "developer": "rsh345", - "inference_platform": "unknown", - "id": "rsh345/mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3891807071902552 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5188437309746964 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07326283987915408 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3028523489932886 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46719791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30535239361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/rubenroy/rubenroy_Geneva-12B-GCv2-5m/e6649e50-54ba-4788-a3b4-5aa3d6e8aed8.json b/leaderboard_data/HFOpenLLMv2/rubenroy/rubenroy_Geneva-12B-GCv2-5m/e6649e50-54ba-4788-a3b4-5aa3d6e8aed8.json deleted file mode 100644 index 3ccc1925aa2d5c72a9dcd5806076592b915e2b46..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/rubenroy/rubenroy_Geneva-12B-GCv2-5m/e6649e50-54ba-4788-a3b4-5aa3d6e8aed8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rubenroy_Geneva-12B-GCv2-5m/1762652580.501345", - "retrieved_timestamp": "1762652580.501346", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rubenroy/Geneva-12B-GCv2-5m", - "developer": "rubenroy", - "inference_platform": "unknown", - "id": "rubenroy/Geneva-12B-GCv2-5m" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2586381911106974 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5278373390214104 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08006042296072508 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3524791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3249667553191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/rubenroy/rubenroy_Gilgamesh-72B/b577bd26-a9f9-4a50-bd2b-f47bc5222748.json b/leaderboard_data/HFOpenLLMv2/rubenroy/rubenroy_Gilgamesh-72B/b577bd26-a9f9-4a50-bd2b-f47bc5222748.json deleted file mode 100644 index 3ae28ed271c8184d05db1a4c957eb928e954ecbc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/rubenroy/rubenroy_Gilgamesh-72B/b577bd26-a9f9-4a50-bd2b-f47bc5222748.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rubenroy_Gilgamesh-72B/1762652580.5016088", - "retrieved_timestamp": "1762652580.5016088", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rubenroy/Gilgamesh-72B", - "developer": "rubenroy", - "inference_platform": "unknown", - "id": "rubenroy/Gilgamesh-72B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8486006019583594 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7253327589560739 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39429530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46264583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5802027925531915 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/rubenroy/rubenroy_Zurich-14B-GCv2-5m/f9dca394-e108-48f3-a45d-a282f7b39098.json b/leaderboard_data/HFOpenLLMv2/rubenroy/rubenroy_Zurich-14B-GCv2-5m/f9dca394-e108-48f3-a45d-a282f7b39098.json deleted file mode 100644 index ed84cf306905dc58089b086301637ea07d73e783..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/rubenroy/rubenroy_Zurich-14B-GCv2-5m/f9dca394-e108-48f3-a45d-a282f7b39098.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rubenroy_Zurich-14B-GCv2-5m/1762652580.5018299", - "retrieved_timestamp": "1762652580.5018299", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rubenroy/Zurich-14B-GCv2-5m", - "developer": "rubenroy", - "inference_platform": "unknown", - "id": "rubenroy/Zurich-14B-GCv2-5m" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6163679038285084 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6308359017750411 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3074018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3615771812080537 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4874479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5232712765957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ruizhe1217/ruizhe1217_sft-s1-qwen-0.5b/fd0e4ea3-ed10-487d-85d7-df5669bc8edc.json b/leaderboard_data/HFOpenLLMv2/ruizhe1217/ruizhe1217_sft-s1-qwen-0.5b/fd0e4ea3-ed10-487d-85d7-df5669bc8edc.json deleted file mode 100644 index 52b2985d2fb5d0a427e6ee38b7af8449e028a51a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ruizhe1217/ruizhe1217_sft-s1-qwen-0.5b/fd0e4ea3-ed10-487d-85d7-df5669bc8edc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ruizhe1217_sft-s1-qwen-0.5b/1762652580.502058", - "retrieved_timestamp": "1762652580.502059", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ruizhe1217/sft-s1-qwen-0.5b", - "developer": "ruizhe1217", - "inference_platform": "unknown", - "id": "ruizhe1217/sft-s1-qwen-0.5b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27487510915482033 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33005365550588683 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27097315436241615 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31958333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1891622340425532 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 0.494 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/rwitz/rwitz_go-bruins-v2/2f6a8cce-672f-4634-99ed-9d42df9cd26c.json b/leaderboard_data/HFOpenLLMv2/rwitz/rwitz_go-bruins-v2/2f6a8cce-672f-4634-99ed-9d42df9cd26c.json deleted file mode 100644 index e45a514d8a154e630572fb89fc51e7d89c4a22ff..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/rwitz/rwitz_go-bruins-v2/2f6a8cce-672f-4634-99ed-9d42df9cd26c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/rwitz_go-bruins-v2/1762652580.5023239", - "retrieved_timestamp": "1762652580.502325", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "rwitz/go-bruins-v2", - "developer": "rwitz", - "inference_platform": "unknown", - "id": "rwitz/go-bruins-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40958877999264176 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37988446841089685 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06722054380664652 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2760970744680851 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sabersaleh/sabersaleh_Llama2-7B-DPO/c2ffce0d-069d-48bb-989c-6fb18bdd9059.json b/leaderboard_data/HFOpenLLMv2/sabersaleh/sabersaleh_Llama2-7B-DPO/c2ffce0d-069d-48bb-989c-6fb18bdd9059.json deleted file mode 100644 index 39021843ec82f88e4ec83ccac4e30bdea770f370..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sabersaleh/sabersaleh_Llama2-7B-DPO/c2ffce0d-069d-48bb-989c-6fb18bdd9059.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sabersaleh_Llama2-7B-DPO/1762652580.50325", - "retrieved_timestamp": "1762652580.503252", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sabersaleh/Llama2-7B-DPO", - "developer": "sabersaleh", - "inference_platform": "unknown", - "id": "sabersaleh/Llama2-7B-DPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14533105493424114 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3512218731420535 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4113645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16256648936170212 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/saishf/saishf_Fimbulvetr-Kuro-Lotus-10.7B/941a914d-0ca4-4896-9dfb-929c08c8651b.json b/leaderboard_data/HFOpenLLMv2/saishf/saishf_Fimbulvetr-Kuro-Lotus-10.7B/941a914d-0ca4-4896-9dfb-929c08c8651b.json deleted file mode 100644 index 9982a306597c9da8969b7bb591388df108868928..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/saishf/saishf_Fimbulvetr-Kuro-Lotus-10.7B/941a914d-0ca4-4896-9dfb-929c08c8651b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/saishf_Fimbulvetr-Kuro-Lotus-10.7B/1762652580.5057359", - "retrieved_timestamp": "1762652580.5057359", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "saishf/Fimbulvetr-Kuro-Lotus-10.7B", - "developer": "saishf", - "inference_platform": "unknown", - "id": "saishf/Fimbulvetr-Kuro-Lotus-10.7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49394384677101205 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4342316286386943 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4445104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33892952127659576 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/saishf/saishf_Neural-SOVLish-Devil-8B-L3/d12855a1-81cb-4fab-b36e-dbee6c6d69a9.json b/leaderboard_data/HFOpenLLMv2/saishf/saishf_Neural-SOVLish-Devil-8B-L3/d12855a1-81cb-4fab-b36e-dbee6c6d69a9.json deleted file mode 100644 index 435c4f71e97b3144f5f038f68ffbc175cf0a7ec0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/saishf/saishf_Neural-SOVLish-Devil-8B-L3/d12855a1-81cb-4fab-b36e-dbee6c6d69a9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/saishf_Neural-SOVLish-Devil-8B-L3/1762652580.506007", - "retrieved_timestamp": "1762652580.506007", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "saishf/Neural-SOVLish-Devil-8B-L3", - "developer": "saishf", - "inference_platform": "unknown", - "id": "saishf/Neural-SOVLish-Devil-8B-L3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41988036188424493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5141802159065874 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4109583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3807347074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/saishshinde15/saishshinde15_TethysAI_Base_Reasoning/74cb7205-e6c9-4faf-a84e-c15daa2ba62b.json b/leaderboard_data/HFOpenLLMv2/saishshinde15/saishshinde15_TethysAI_Base_Reasoning/74cb7205-e6c9-4faf-a84e-c15daa2ba62b.json deleted file mode 100644 index 925f7e8fb1203b60a3c55ad93521d1abffd66ccb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/saishshinde15/saishshinde15_TethysAI_Base_Reasoning/74cb7205-e6c9-4faf-a84e-c15daa2ba62b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/saishshinde15_TethysAI_Base_Reasoning/1762652580.5062242", - "retrieved_timestamp": "1762652580.5062249", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "saishshinde15/TethysAI_Base_Reasoning", - "developer": "saishshinde15", - "inference_platform": "unknown", - "id": "saishshinde15/TethysAI_Base_Reasoning" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6368757119997164 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4518558867290183 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31419939577039274 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4074583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3236369680851064 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/saishshinde15/saishshinde15_TethysAI_Vortex/6e20bb3a-728d-40ef-b6ca-91b0dde02da4.json b/leaderboard_data/HFOpenLLMv2/saishshinde15/saishshinde15_TethysAI_Vortex/6e20bb3a-728d-40ef-b6ca-91b0dde02da4.json deleted file mode 100644 index 3956104f5e76eb80a34c98e3446f813e29f007b5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/saishshinde15/saishshinde15_TethysAI_Vortex/6e20bb3a-728d-40ef-b6ca-91b0dde02da4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/saishshinde15_TethysAI_Vortex/1762652580.5066721", - "retrieved_timestamp": "1762652580.5066729", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "saishshinde15/TethysAI_Vortex", - "developer": "saishshinde15", - "inference_platform": "unknown", - "id": "saishshinde15/TethysAI_Vortex" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4297718941297978 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4749261293502527 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3149546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44578125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3240525265957447 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/saishshinde15/saishshinde15_TethysAI_Vortex_Reasoning/79022531-2599-4c19-93e0-ecdbde7bf736.json b/leaderboard_data/HFOpenLLMv2/saishshinde15/saishshinde15_TethysAI_Vortex_Reasoning/79022531-2599-4c19-93e0-ecdbde7bf736.json deleted file mode 100644 index 6835a231745cd8bc27a42bb365afa3d470c9ec39..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/saishshinde15/saishshinde15_TethysAI_Vortex_Reasoning/79022531-2599-4c19-93e0-ecdbde7bf736.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/saishshinde15_TethysAI_Vortex_Reasoning/1762652580.506901", - "retrieved_timestamp": "1762652580.506902", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "saishshinde15/TethysAI_Vortex_Reasoning", - "developer": "saishshinde15", - "inference_platform": "unknown", - "id": "saishshinde15/TethysAI_Vortex_Reasoning" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40211970903868405 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4693805860486275 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40844791666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3380984042553192 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sakaltcommunity/sakaltcommunity_novablast-preview/588d2387-29de-41bc-8233-674081948787.json b/leaderboard_data/HFOpenLLMv2/sakaltcommunity/sakaltcommunity_novablast-preview/588d2387-29de-41bc-8233-674081948787.json deleted file mode 100644 index bfb52575163224411483513bc3b10b34943e3eae..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sakaltcommunity/sakaltcommunity_novablast-preview/588d2387-29de-41bc-8233-674081948787.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sakaltcommunity_novablast-preview/1762652580.507118", - "retrieved_timestamp": "1762652580.5071192", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sakaltcommunity/novablast-preview", - "developer": "sakaltcommunity", - "inference_platform": "unknown", - "id": "sakaltcommunity/novablast-preview" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4530279657974175 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7042765234852668 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48942598187311176 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5021145833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5915059840425532 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sakaltcommunity/sakaltcommunity_sakaltum-7b/5fdd75fd-6e57-4ba4-8b6a-58998ff88bd9.json b/leaderboard_data/HFOpenLLMv2/sakaltcommunity/sakaltcommunity_sakaltum-7b/5fdd75fd-6e57-4ba4-8b6a-58998ff88bd9.json deleted file mode 100644 index 18f1a2064273ff51961661a21dfe30a69b4b5ce6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sakaltcommunity/sakaltcommunity_sakaltum-7b/5fdd75fd-6e57-4ba4-8b6a-58998ff88bd9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sakaltcommunity_sakaltum-7b/1762652580.5073972", - "retrieved_timestamp": "1762652580.507398", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sakaltcommunity/sakaltum-7b", - "developer": "sakaltcommunity", - "inference_platform": "unknown", - "id": "sakaltcommunity/sakaltum-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2603868845773658 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4575213514148995 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3775 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2769281914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/saltlux/saltlux_luxia-21.4b-alignment-v1.0/fe959cc1-17bd-4e87-b9b7-84d3adddbedb.json b/leaderboard_data/HFOpenLLMv2/saltlux/saltlux_luxia-21.4b-alignment-v1.0/fe959cc1-17bd-4e87-b9b7-84d3adddbedb.json deleted file mode 100644 index c8d33fb12e7bc2489a853fbb796eab838dae3886..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/saltlux/saltlux_luxia-21.4b-alignment-v1.0/fe959cc1-17bd-4e87-b9b7-84d3adddbedb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/saltlux_luxia-21.4b-alignment-v1.0/1762652580.507964", - "retrieved_timestamp": "1762652580.5079648", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "saltlux/luxia-21.4b-alignment-v1.0", - "developer": "saltlux", - "inference_platform": "unknown", - "id": "saltlux/luxia-21.4b-alignment-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36929679915956326 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6373342606775594 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09743202416918428 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43284374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34034242021276595 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 21.421 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/saltlux/saltlux_luxia-21.4b-alignment-v1.2/b89b30bb-fbaa-4ac6-8535-9f31cf87eb55.json b/leaderboard_data/HFOpenLLMv2/saltlux/saltlux_luxia-21.4b-alignment-v1.2/b89b30bb-fbaa-4ac6-8535-9f31cf87eb55.json deleted file mode 100644 index dbcd145baf6b56d820803ea4cd8a78d8bbc032b1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/saltlux/saltlux_luxia-21.4b-alignment-v1.2/b89b30bb-fbaa-4ac6-8535-9f31cf87eb55.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/saltlux_luxia-21.4b-alignment-v1.2/1762652580.508301", - "retrieved_timestamp": "1762652580.5083032", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "saltlux/luxia-21.4b-alignment-v1.2", - "developer": "saltlux", - "inference_platform": "unknown", - "id": "saltlux/luxia-21.4b-alignment-v1.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41153694419695297 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6371180708112368 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4458958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34732380319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 21.421 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sam-paech/sam-paech_Darkest-muse-v1/dae1ceb0-97b1-4285-b9db-912d7b4b01c7.json b/leaderboard_data/HFOpenLLMv2/sam-paech/sam-paech_Darkest-muse-v1/dae1ceb0-97b1-4285-b9db-912d7b4b01c7.json deleted file mode 100644 index ab42682da658ba258a1d3873871fe0b20e469a09..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sam-paech/sam-paech_Darkest-muse-v1/dae1ceb0-97b1-4285-b9db-912d7b4b01c7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sam-paech_Darkest-muse-v1/1762652580.508588", - "retrieved_timestamp": "1762652580.508589", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sam-paech/Darkest-muse-v1", - "developer": "sam-paech", - "inference_platform": "unknown", - "id": "sam-paech/Darkest-muse-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7344202272193336 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5968439530708949 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21450151057401812 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34395973154362414 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4502083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4183843085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sam-paech/sam-paech_Delirium-v1/78dd5568-0d0d-4cc5-ad1a-bfba857c827e.json b/leaderboard_data/HFOpenLLMv2/sam-paech/sam-paech_Delirium-v1/78dd5568-0d0d-4cc5-ad1a-bfba857c827e.json deleted file mode 100644 index 0113f7f92401c0e14344c7b0e30aa21db5d43cf7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sam-paech/sam-paech_Delirium-v1/78dd5568-0d0d-4cc5-ad1a-bfba857c827e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sam-paech_Delirium-v1/1762652580.508875", - "retrieved_timestamp": "1762652580.508876", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sam-paech/Delirium-v1", - "developer": "sam-paech", - "inference_platform": "unknown", - "id": "sam-paech/Delirium-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7207564816908026 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5962113834521733 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2107250755287009 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45144791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4189660904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sam-paech/sam-paech_Quill-v1/248541b3-aeae-429d-93ae-06cc3bc82cd8.json b/leaderboard_data/HFOpenLLMv2/sam-paech/sam-paech_Quill-v1/248541b3-aeae-429d-93ae-06cc3bc82cd8.json deleted file mode 100644 index 16b15becb83937ec1f52e2a404becb11a76d9814..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sam-paech/sam-paech_Quill-v1/248541b3-aeae-429d-93ae-06cc3bc82cd8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sam-paech_Quill-v1/1762652580.5091672", - "retrieved_timestamp": "1762652580.5091681", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sam-paech/Quill-v1", - "developer": "sam-paech", - "inference_platform": "unknown", - "id": "sam-paech/Quill-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.712213593265868 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5969226347989487 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2122356495468278 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33976510067114096 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45547916666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4171376329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sarvamai/sarvamai_OpenHathi-7B-Hi-v0.1-Base/e0c03300-a08f-409e-9f39-f00d5e9e126f.json b/leaderboard_data/HFOpenLLMv2/sarvamai/sarvamai_OpenHathi-7B-Hi-v0.1-Base/e0c03300-a08f-409e-9f39-f00d5e9e126f.json deleted file mode 100644 index 4b674c410041d34c79792ee7e363dcd6e9203d75..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sarvamai/sarvamai_OpenHathi-7B-Hi-v0.1-Base/e0c03300-a08f-409e-9f39-f00d5e9e126f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sarvamai_OpenHathi-7B-Hi-v0.1-Base/1762652580.509491", - "retrieved_timestamp": "1762652580.5094929", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sarvamai/OpenHathi-7B-Hi-v0.1-Base", - "developer": "sarvamai", - "inference_platform": "unknown", - "id": "sarvamai/OpenHathi-7B-Hi-v0.1-Base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18040244329490196 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33540458231510667 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36584375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15433843085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.87 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/schnapss/schnapss_testmerge-7b/faa7be96-1419-48be-9b95-e97689296de0.json b/leaderboard_data/HFOpenLLMv2/schnapss/schnapss_testmerge-7b/faa7be96-1419-48be-9b95-e97689296de0.json deleted file mode 100644 index 338a4b297c83b4d6ae377060dd559c286b618b4d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/schnapss/schnapss_testmerge-7b/faa7be96-1419-48be-9b95-e97689296de0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/schnapss_testmerge-7b/1762652580.509877", - "retrieved_timestamp": "1762652580.509878", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "schnapss/testmerge-7b", - "developer": "schnapss", - "inference_platform": "unknown", - "id": "schnapss/testmerge-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39222817679313116 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5187478405637375 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06873111782477341 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4685625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30601728723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sci-m-wang/sci-m-wang_Mistral-7B-Instruct-sa-v0.1/8125700c-d9e7-4d6e-9b78-049331dd571b.json b/leaderboard_data/HFOpenLLMv2/sci-m-wang/sci-m-wang_Mistral-7B-Instruct-sa-v0.1/8125700c-d9e7-4d6e-9b78-049331dd571b.json deleted file mode 100644 index 865b14fd3d37fa96215f6385b3b5f21c891fc9d5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sci-m-wang/sci-m-wang_Mistral-7B-Instruct-sa-v0.1/8125700c-d9e7-4d6e-9b78-049331dd571b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sci-m-wang_Mistral-7B-Instruct-sa-v0.1/1762652580.510147", - "retrieved_timestamp": "1762652580.510148", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sci-m-wang/Mistral-7B-Instruct-sa-v0.1", - "developer": "sci-m-wang", - "inference_platform": "unknown", - "id": "sci-m-wang/Mistral-7B-Instruct-sa-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4335186194851882 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32727821561411724 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38999999999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2362034574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 14.483 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sci-m-wang/sci-m-wang_Phi-3-mini-4k-instruct-sa-v0.1/319484e0-12aa-4212-b55f-d19efdd2f719.json b/leaderboard_data/HFOpenLLMv2/sci-m-wang/sci-m-wang_Phi-3-mini-4k-instruct-sa-v0.1/319484e0-12aa-4212-b55f-d19efdd2f719.json deleted file mode 100644 index 5b084dfc7892c4222c0c532662ec7ed76d0775ad..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sci-m-wang/sci-m-wang_Phi-3-mini-4k-instruct-sa-v0.1/319484e0-12aa-4212-b55f-d19efdd2f719.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sci-m-wang_Phi-3-mini-4k-instruct-sa-v0.1/1762652580.510415", - "retrieved_timestamp": "1762652580.510418", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sci-m-wang/Phi-3-mini-4k-instruct-sa-v0.1", - "developer": "sci-m-wang", - "inference_platform": "unknown", - "id": "sci-m-wang/Phi-3-mini-4k-instruct-sa-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5020623057930734 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5502038722383045 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14803625377643503 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40730208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39852061170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 7.642 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sci-m-wang/sci-m-wang_deepseek-llm-7b-chat-sa-v0.1/182d68d5-9b03-41bc-850c-1f571c36e630.json b/leaderboard_data/HFOpenLLMv2/sci-m-wang/sci-m-wang_deepseek-llm-7b-chat-sa-v0.1/182d68d5-9b03-41bc-850c-1f571c36e630.json deleted file mode 100644 index 717e87276518b3a840ebbf6d6b33e1ec473451a6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sci-m-wang/sci-m-wang_deepseek-llm-7b-chat-sa-v0.1/182d68d5-9b03-41bc-850c-1f571c36e630.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sci-m-wang_deepseek-llm-7b-chat-sa-v0.1/1762652580.5106509", - "retrieved_timestamp": "1762652580.5106518", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sci-m-wang/deepseek-llm-7b-chat-sa-v0.1", - "developer": "sci-m-wang", - "inference_platform": "unknown", - "id": "sci-m-wang/deepseek-llm-7b-chat-sa-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4035935761557113 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37177200995276305 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25671140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4173125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22091090425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/senseable/senseable_WestLake-7B-v2/6ef15d50-74b7-4e09-856c-05343841e24b.json b/leaderboard_data/HFOpenLLMv2/senseable/senseable_WestLake-7B-v2/6ef15d50-74b7-4e09-856c-05343841e24b.json deleted file mode 100644 index 5ce3a919e3faa0576cf8b49629317e0deb430ddf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/senseable/senseable_WestLake-7B-v2/6ef15d50-74b7-4e09-856c-05343841e24b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/senseable_WestLake-7B-v2/1762652580.511263", - "retrieved_timestamp": "1762652580.511264", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "senseable/WestLake-7B-v2", - "developer": "senseable", - "inference_platform": "unknown", - "id": "senseable/WestLake-7B-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4418620371724801 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4073276290688943 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39371874999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27642952127659576 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sethuiyer/sethuiyer_Llama-3.1-8B-Experimental-1206-Instruct/49334550-08eb-49a2-9cea-f90f22533ab1.json b/leaderboard_data/HFOpenLLMv2/sethuiyer/sethuiyer_Llama-3.1-8B-Experimental-1206-Instruct/49334550-08eb-49a2-9cea-f90f22533ab1.json deleted file mode 100644 index 05d1c135b206ded665a2ba90557dbe061150ff30..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sethuiyer/sethuiyer_Llama-3.1-8B-Experimental-1206-Instruct/49334550-08eb-49a2-9cea-f90f22533ab1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sethuiyer_Llama-3.1-8B-Experimental-1206-Instruct/1762652580.512954", - "retrieved_timestamp": "1762652580.512954", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sethuiyer/Llama-3.1-8B-Experimental-1206-Instruct", - "developer": "sethuiyer", - "inference_platform": "unknown", - "id": "sethuiyer/Llama-3.1-8B-Experimental-1206-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6967014189018471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.510381184158217 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39657291666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35289228723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sethuiyer/sethuiyer_Llama-3.1-8B-Experimental-1208-Instruct/d4b778ea-ae70-437f-a295-772abc659027.json b/leaderboard_data/HFOpenLLMv2/sethuiyer/sethuiyer_Llama-3.1-8B-Experimental-1208-Instruct/d4b778ea-ae70-437f-a295-772abc659027.json deleted file mode 100644 index a074180c140b838e4765e64df240e9b75c44c178..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sethuiyer/sethuiyer_Llama-3.1-8B-Experimental-1208-Instruct/d4b778ea-ae70-437f-a295-772abc659027.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sethuiyer_Llama-3.1-8B-Experimental-1208-Instruct/1762652580.513202", - "retrieved_timestamp": "1762652580.513203", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sethuiyer/Llama-3.1-8B-Experimental-1208-Instruct", - "developer": "sethuiyer", - "inference_platform": "unknown", - "id": "sethuiyer/Llama-3.1-8B-Experimental-1208-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6099981382731153 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49642264289263355 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3789895833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35106382978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sethuiyer/sethuiyer_Llamaverse-3.1-8B-Instruct/f0a224c2-037a-4229-bb00-5d76d3974078.json b/leaderboard_data/HFOpenLLMv2/sethuiyer/sethuiyer_Llamaverse-3.1-8B-Instruct/f0a224c2-037a-4229-bb00-5d76d3974078.json deleted file mode 100644 index 0f619db72d28f00cb68e4a3fc2ec1545c2783aa4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sethuiyer/sethuiyer_Llamaverse-3.1-8B-Instruct/f0a224c2-037a-4229-bb00-5d76d3974078.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sethuiyer_Llamaverse-3.1-8B-Instruct/1762652580.513652", - "retrieved_timestamp": "1762652580.513653", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sethuiyer/Llamaverse-3.1-8B-Instruct", - "developer": "sethuiyer", - "inference_platform": "unknown", - "id": "sethuiyer/Llamaverse-3.1-8B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6185410266980501 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5414159562743479 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3761666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3523105053191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sethuiyer/sethuiyer_Llamazing-3.1-8B-Instruct/9065a7df-dab7-4e3b-bbc5-01f2908c37b3.json b/leaderboard_data/HFOpenLLMv2/sethuiyer/sethuiyer_Llamazing-3.1-8B-Instruct/9065a7df-dab7-4e3b-bbc5-01f2908c37b3.json deleted file mode 100644 index fdf3eb3a000fb68db57d8f7c70c8027904a30aef..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sethuiyer/sethuiyer_Llamazing-3.1-8B-Instruct/9065a7df-dab7-4e3b-bbc5-01f2908c37b3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sethuiyer_Llamazing-3.1-8B-Instruct/1762652580.513854", - "retrieved_timestamp": "1762652580.513855", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sethuiyer/Llamazing-3.1-8B-Instruct", - "developer": "sethuiyer", - "inference_platform": "unknown", - "id": "sethuiyer/Llamazing-3.1-8B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5711301568726534 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.529106967510303 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.054380664652567974 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39759374999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3606216755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/shadowml/shadowml_BeagSake-7B/2a71923c-8697-4b62-94fa-4c16874df7a7.json b/leaderboard_data/HFOpenLLMv2/shadowml/shadowml_BeagSake-7B/2a71923c-8697-4b62-94fa-4c16874df7a7.json deleted file mode 100644 index 80c8478e43538295fe2aebdd16d5bb038e02344e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/shadowml/shadowml_BeagSake-7B/2a71923c-8697-4b62-94fa-4c16874df7a7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/shadowml_BeagSake-7B/1762652580.514317", - "retrieved_timestamp": "1762652580.514318", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "shadowml/BeagSake-7B", - "developer": "shadowml", - "inference_platform": "unknown", - "id": "shadowml/BeagSake-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5215960318621258 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47110342371098474 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05060422960725076 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41235416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25847739361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/shadowml/shadowml_Mixolar-4x7b/65a2c055-9bb5-458d-8a65-89b363b47a3a.json b/leaderboard_data/HFOpenLLMv2/shadowml/shadowml_Mixolar-4x7b/65a2c055-9bb5-458d-8a65-89b363b47a3a.json deleted file mode 100644 index 383b0fc4b23d9f9505fde19d2a073f278bf314db..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/shadowml/shadowml_Mixolar-4x7b/65a2c055-9bb5-458d-8a65-89b363b47a3a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/shadowml_Mixolar-4x7b/1762652580.5145578", - "retrieved_timestamp": "1762652580.514559", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "shadowml/Mixolar-4x7b", - "developer": "shadowml", - "inference_platform": "unknown", - "id": "shadowml/Mixolar-4x7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3893303102434873 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5215949876221495 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42575 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053523936170215 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 36.099 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/shastraai/shastraai_Shastra-LLAMA2-Math-Commonsense-SFT/563e2894-10bf-43e1-af67-5cd97d52f033.json b/leaderboard_data/HFOpenLLMv2/shastraai/shastraai_Shastra-LLAMA2-Math-Commonsense-SFT/563e2894-10bf-43e1-af67-5cd97d52f033.json deleted file mode 100644 index 201c89478f602845fe368e36190e8957b5e51a6a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/shastraai/shastraai_Shastra-LLAMA2-Math-Commonsense-SFT/563e2894-10bf-43e1-af67-5cd97d52f033.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/shastraai_Shastra-LLAMA2-Math-Commonsense-SFT/1762652580.5147672", - "retrieved_timestamp": "1762652580.5147672", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "shastraai/Shastra-LLAMA2-Math-Commonsense-SFT", - "developer": "shastraai", - "inference_platform": "unknown", - "id": "shastraai/Shastra-LLAMA2-Math-Commonsense-SFT" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3041507644161935 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.384316753625765 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3604479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19971742021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/shivam9980/shivam9980_NEPALI-LLM/234f5f98-a5fc-417a-8463-186bf600993a.json b/leaderboard_data/HFOpenLLMv2/shivam9980/shivam9980_NEPALI-LLM/234f5f98-a5fc-417a-8463-186bf600993a.json deleted file mode 100644 index 08b751d541d9a02345042442b1b6c70e599b6f02..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/shivam9980/shivam9980_NEPALI-LLM/234f5f98-a5fc-417a-8463-186bf600993a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/shivam9980_NEPALI-LLM/1762652580.51522", - "retrieved_timestamp": "1762652580.5152209", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "shivam9980/NEPALI-LLM", - "developer": "shivam9980", - "inference_platform": "unknown", - "id": "shivam9980/NEPALI-LLM" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.041666112581284324 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3828457133787513 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41219791666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2064494680851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.273 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/shuttleai/shuttleai_shuttle-3/bc357a38-215b-4885-9e0e-6f2b6f0bf1cc.json b/leaderboard_data/HFOpenLLMv2/shuttleai/shuttleai_shuttle-3/bc357a38-215b-4885-9e0e-6f2b6f0bf1cc.json deleted file mode 100644 index fd14890dd229173181c3fc4092bcd8617a368230..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/shuttleai/shuttleai_shuttle-3/bc357a38-215b-4885-9e0e-6f2b6f0bf1cc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/shuttleai_shuttle-3/1762652580.5160902", - "retrieved_timestamp": "1762652580.5160909", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "shuttleai/shuttle-3", - "developer": "shuttleai", - "inference_platform": "unknown", - "id": "shuttleai/shuttle-3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.815403130360776 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7420334281529087 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45996978851963743 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41191275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4376875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5716422872340425 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/shyamieee/shyamieee_Padma-v7.0/81546997-4dda-45ea-81fb-23db1b3b5cd7.json b/leaderboard_data/HFOpenLLMv2/shyamieee/shyamieee_Padma-v7.0/81546997-4dda-45ea-81fb-23db1b3b5cd7.json deleted file mode 100644 index d9ac77caf5456f4277934e278481ad434a9c550d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/shyamieee/shyamieee_Padma-v7.0/81546997-4dda-45ea-81fb-23db1b3b5cd7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/shyamieee_Padma-v7.0/1762652580.51635", - "retrieved_timestamp": "1762652580.51635", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "shyamieee/Padma-v7.0", - "developer": "shyamieee", - "inference_platform": "unknown", - "id": "shyamieee/Padma-v7.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3841097177710696 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5118785631761485 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2860738255033557 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43855208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3029421542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/silma-ai/silma-ai_SILMA-9B-Instruct-v1.0/de11a0bf-47ea-444f-bf89-45e9208cfd1a.json b/leaderboard_data/HFOpenLLMv2/silma-ai/silma-ai_SILMA-9B-Instruct-v1.0/de11a0bf-47ea-444f-bf89-45e9208cfd1a.json deleted file mode 100644 index e36b434d1130d52bec3023cae4355c4805602046..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/silma-ai/silma-ai_SILMA-9B-Instruct-v1.0/de11a0bf-47ea-444f-bf89-45e9208cfd1a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/silma-ai_SILMA-9B-Instruct-v1.0/1762652580.516612", - "retrieved_timestamp": "1762652580.516613", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "silma-ai/SILMA-9B-Instruct-v1.0", - "developer": "silma-ai", - "inference_platform": "unknown", - "id": "silma-ai/SILMA-9B-Instruct-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5841943820174914 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5219015032853501 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163141993957704 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46369791666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39195478723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/silma-ai/silma-ai_SILMA-Kashif-2B-Instruct-v1.0/e6926be5-561b-453b-8d5f-e64f380c4a51.json b/leaderboard_data/HFOpenLLMv2/silma-ai/silma-ai_SILMA-Kashif-2B-Instruct-v1.0/e6926be5-561b-453b-8d5f-e64f380c4a51.json deleted file mode 100644 index ae1856c0faed8ce3e3973854e40ab6b630d30dbf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/silma-ai/silma-ai_SILMA-Kashif-2B-Instruct-v1.0/e6926be5-561b-453b-8d5f-e64f380c4a51.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/silma-ai_SILMA-Kashif-2B-Instruct-v1.0/1762652580.516862", - "retrieved_timestamp": "1762652580.5168629", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "silma-ai/SILMA-Kashif-2B-Instruct-v1.0", - "developer": "silma-ai", - "inference_platform": "unknown", - "id": "silma-ai/SILMA-Kashif-2B-Instruct-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11807781131841291 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37932201246317715 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2701342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4042604166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22581449468085107 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/skymizer/skymizer_Llama2-7b-sft-chat-custom-template-dpo/24473e8a-2631-44b5-9cc2-81f0669d8032.json b/leaderboard_data/HFOpenLLMv2/skymizer/skymizer_Llama2-7b-sft-chat-custom-template-dpo/24473e8a-2631-44b5-9cc2-81f0669d8032.json deleted file mode 100644 index 9946cf72be93e1990ec7fe1b62ab4b8cada26253..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/skymizer/skymizer_Llama2-7b-sft-chat-custom-template-dpo/24473e8a-2631-44b5-9cc2-81f0669d8032.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/skymizer_Llama2-7b-sft-chat-custom-template-dpo/1762652580.517826", - "retrieved_timestamp": "1762652580.517826", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "skymizer/Llama2-7b-sft-chat-custom-template-dpo", - "developer": "skymizer", - "inference_platform": "unknown", - "id": "skymizer/Llama2-7b-sft-chat-custom-template-dpo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2352823840742563 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36884662302661564 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23909395973154363 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44286458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19464760638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.738 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_ChocoTrio-14B-v1/c2034822-689f-4e8b-9575-b63081584aec.json b/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_ChocoTrio-14B-v1/c2034822-689f-4e8b-9575-b63081584aec.json deleted file mode 100644 index a052c335cd2444073b2ad24d82cc6d0ff55cb3db..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_ChocoTrio-14B-v1/c2034822-689f-4e8b-9575-b63081584aec.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_ChocoTrio-14B-v1/1762652580.518315", - "retrieved_timestamp": "1762652580.518315", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/ChocoTrio-14B-v1", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/ChocoTrio-14B-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7088912973133508 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6505840125855428 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3972809667673716 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3850671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4820520833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5369847074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_IF-reasoning-experiment-40/162b8329-ad84-463b-bda7-7383edda04d8.json b/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_IF-reasoning-experiment-40/162b8329-ad84-463b-bda7-7383edda04d8.json deleted file mode 100644 index 4d70246c3750edf8d0922c0da8f4bcf81dc160fc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_IF-reasoning-experiment-40/162b8329-ad84-463b-bda7-7383edda04d8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_IF-reasoning-experiment-40/1762652580.518558", - "retrieved_timestamp": "1762652580.518559", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/IF-reasoning-experiment-40", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/IF-reasoning-experiment-40" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6329793835910938 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6111859401994667 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3800335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5194166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5024933510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_IF-reasoning-experiment-80/b1097c42-10fe-4892-8e85-60385ecf35bf.json b/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_IF-reasoning-experiment-80/b1097c42-10fe-4892-8e85-60385ecf35bf.json deleted file mode 100644 index 43190ce2f7dcf2c47fca6d2b137ff07e27b6aaf0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_IF-reasoning-experiment-80/b1097c42-10fe-4892-8e85-60385ecf35bf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_IF-reasoning-experiment-80/1762652580.5187662", - "retrieved_timestamp": "1762652580.518767", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/IF-reasoning-experiment-80", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/IF-reasoning-experiment-80" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5462761029623622 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42103836132239286 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09894259818731117 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28439597315436244 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5024583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3367686170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.383 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_KytheraMix-7B-v0.2/c50f0ef7-18e4-4f03-8262-ee1519c59b7f.json b/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_KytheraMix-7B-v0.2/c50f0ef7-18e4-4f03-8262-ee1519c59b7f.json deleted file mode 100644 index ba28695d1ddd7ba07a7a4243a7f331b3b8f23272..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_KytheraMix-7B-v0.2/c50f0ef7-18e4-4f03-8262-ee1519c59b7f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_KytheraMix-7B-v0.2/1762652580.5189881", - "retrieved_timestamp": "1762652580.5189881", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/KytheraMix-7B-v0.2", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/KytheraMix-7B-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6128705168951715 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5635202746804572 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29229607250755285 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33557046979865773 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45941666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45054853723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_Lamarck-14B-v0.1-experimental/aa2b9fb3-77ca-4a48-b3dd-77879220a6b8.json b/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_Lamarck-14B-v0.1-experimental/aa2b9fb3-77ca-4a48-b3dd-77879220a6b8.json deleted file mode 100644 index b6beb75dbe5e2a56370082939fbff48a04a73b4a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_Lamarck-14B-v0.1-experimental/aa2b9fb3-77ca-4a48-b3dd-77879220a6b8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.1-experimental/1762652580.519198", - "retrieved_timestamp": "1762652580.519199", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Lamarck-14B-v0.1-experimental", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/Lamarck-14B-v0.1-experimental" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5353850006870658 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6582539239967329 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3580060422960725 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47284375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5408078457446809 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_Lamarck-14B-v0.3/6103d107-0eb8-4b0e-8947-d5c7e7cb62f6.json b/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_Lamarck-14B-v0.3/6103d107-0eb8-4b0e-8947-d5c7e7cb62f6.json deleted file mode 100644 index c07cd309a14b41372a874c0e07b9382eb5e25370..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_Lamarck-14B-v0.3/6103d107-0eb8-4b0e-8947-d5c7e7cb62f6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.3/1762652580.519407", - "retrieved_timestamp": "1762652580.5194082", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Lamarck-14B-v0.3", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/Lamarck-14B-v0.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5031616111916382 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6611400465373158 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3406344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3884228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4688125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5410571808510638 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_Lamarck-14B-v0.6-002-model_stock/bd904778-1ad9-48fe-a12e-4b62ce46bd0b.json b/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_Lamarck-14B-v0.6-002-model_stock/bd904778-1ad9-48fe-a12e-4b62ce46bd0b.json deleted file mode 100644 index 1effe611123dfecec7031df0c0bb52f54d6fd75c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_Lamarck-14B-v0.6-002-model_stock/bd904778-1ad9-48fe-a12e-4b62ce46bd0b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.6-002-model_stock/1762652580.520087", - "retrieved_timestamp": "1762652580.520087", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Lamarck-14B-v0.6-002-model_stock", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/Lamarck-14B-v0.6-002-model_stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.669224324791553 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6143349188724702 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3776435045317221 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37416107382550334 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5180208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5054022606382979 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_Lamarck-14B-v0.6-model_stock/92d4d9ca-d19f-45c5-b506-5b1039100c92.json b/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_Lamarck-14B-v0.6-model_stock/92d4d9ca-d19f-45c5-b506-5b1039100c92.json deleted file mode 100644 index 3f783a318f18e4c6c25eff5089efb13fd4d818de..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_Lamarck-14B-v0.6-model_stock/92d4d9ca-d19f-45c5-b506-5b1039100c92.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.6-model_stock/1762652580.520298", - "retrieved_timestamp": "1762652580.520299", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Lamarck-14B-v0.6-model_stock", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/Lamarck-14B-v0.6-model_stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6789662539838739 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6269436532753222 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4244712990936556 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38422818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.50065625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.519780585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_Lamarck-14B-v0.6/dd7005a5-281d-42e9-9916-663b1641718f.json b/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_Lamarck-14B-v0.6/dd7005a5-281d-42e9-9916-663b1641718f.json deleted file mode 100644 index 4fd38ec3b1cde264421e46a992a736034d374ce8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_Lamarck-14B-v0.6/dd7005a5-281d-42e9-9916-663b1641718f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.6/1762652580.519876", - "retrieved_timestamp": "1762652580.519876", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Lamarck-14B-v0.6", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/Lamarck-14B-v0.6" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6972510716011294 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6460312233782931 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4040785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38926174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4846875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5399767287234043 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_Lamarck-14B-v0.7-Fusion/480b1187-5f66-4414-84b1-4c6ce1ebf137.json b/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_Lamarck-14B-v0.7-Fusion/480b1187-5f66-4414-84b1-4c6ce1ebf137.json deleted file mode 100644 index 553de41ae953a6703aac2d2b805e0feace30e428..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_Lamarck-14B-v0.7-Fusion/480b1187-5f66-4414-84b1-4c6ce1ebf137.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.7-Fusion/1762652580.52051", - "retrieved_timestamp": "1762652580.520511", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Lamarck-14B-v0.7-Fusion", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/Lamarck-14B-v0.7-Fusion" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6821134589555713 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6543636625652262 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4040785498489426 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.401006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49913541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5390625 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_Lamarck-14B-v0.7-rc1/5919f71f-8d7b-4cce-a7ce-01680c08acf2.json b/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_Lamarck-14B-v0.7-rc1/5919f71f-8d7b-4cce-a7ce-01680c08acf2.json deleted file mode 100644 index a087e58bc90227c939f92ed45dd32a0ca4c59e6e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_Lamarck-14B-v0.7-rc1/5919f71f-8d7b-4cce-a7ce-01680c08acf2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.7-rc1/1762652580.520714", - "retrieved_timestamp": "1762652580.520715", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Lamarck-14B-v0.7-rc1", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/Lamarck-14B-v0.7-rc1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7305482785675341 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6486027992626241 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3851963746223565 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38926174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47147916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5415558510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_Lamarck-14B-v0.7-rc4/b3b9b1a5-4495-4649-9943-58986d94fcb1.json b/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_Lamarck-14B-v0.7-rc4/b3b9b1a5-4495-4649-9943-58986d94fcb1.json deleted file mode 100644 index 13f235edbada36ea325184e94d5edabe06160981..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_Lamarck-14B-v0.7-rc4/b3b9b1a5-4495-4649-9943-58986d94fcb1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.7-rc4/1762652580.520921", - "retrieved_timestamp": "1762652580.5209222", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/Lamarck-14B-v0.7-rc4", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/Lamarck-14B-v0.7-rc4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7210811757248545 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6509652911243554 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4025679758308157 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38926174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4911979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5399767287234043 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_LamarckInfusion-14B-v1/e7577048-db59-4629-aeb0-f50b72cbb827.json b/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_LamarckInfusion-14B-v1/e7577048-db59-4629-aeb0-f50b72cbb827.json deleted file mode 100644 index b476acae152eb979bb7abec0a92d6c282f04d73b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_LamarckInfusion-14B-v1/e7577048-db59-4629-aeb0-f50b72cbb827.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_LamarckInfusion-14B-v1/1762652580.521131", - "retrieved_timestamp": "1762652580.521132", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/LamarckInfusion-14B-v1", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/LamarckInfusion-14B-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7198322672730577 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6539252513912222 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4169184290030212 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39093959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48989583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5376496010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_LamarckInfusion-14B-v2-hi/e4b943ea-3e97-490b-af6d-ad7dc0fdf012.json b/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_LamarckInfusion-14B-v2-hi/e4b943ea-3e97-490b-af6d-ad7dc0fdf012.json deleted file mode 100644 index eff1cb3c7ff4bbc4a180de87d55e2465edc2d056..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_LamarckInfusion-14B-v2-hi/e4b943ea-3e97-490b-af6d-ad7dc0fdf012.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_LamarckInfusion-14B-v2-hi/1762652580.521555", - "retrieved_timestamp": "1762652580.521556", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/LamarckInfusion-14B-v2-hi", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/LamarckInfusion-14B-v2-hi" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.685485622592499 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6555026541798943 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3884228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48471875000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5404753989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_LamarckInfusion-14B-v2-lo/57084771-cc66-485c-99ca-470556e14c1b.json b/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_LamarckInfusion-14B-v2-lo/57084771-cc66-485c-99ca-470556e14c1b.json deleted file mode 100644 index 63bcf2feb05ef9958ea3e1c8c383867f3367b29f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_LamarckInfusion-14B-v2-lo/57084771-cc66-485c-99ca-470556e14c1b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_LamarckInfusion-14B-v2-lo/1762652580.52177", - "retrieved_timestamp": "1762652580.521771", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/LamarckInfusion-14B-v2-lo", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/LamarckInfusion-14B-v2-lo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6787911630030541 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6528441920403686 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42371601208459214 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3859060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4991041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5397273936170213 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_LamarckInfusion-14B-v2/95f82b68-6135-4d7d-a2f8-b589d4041776.json b/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_LamarckInfusion-14B-v2/95f82b68-6135-4d7d-a2f8-b589d4041776.json deleted file mode 100644 index 607c65c4456344c0d817d0a5d9ab65fdf70ae127..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_LamarckInfusion-14B-v2/95f82b68-6135-4d7d-a2f8-b589d4041776.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_LamarckInfusion-14B-v2/1762652580.521342", - "retrieved_timestamp": "1762652580.521342", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/LamarckInfusion-14B-v2", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/LamarckInfusion-14B-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6811892445378263 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6564434429766982 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.438821752265861 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3875838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4992604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5416389627659575 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_LamarckInfusion-14B-v3/8fe84e89-c582-44d0-b961-d6ed4d889193.json b/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_LamarckInfusion-14B-v3/8fe84e89-c582-44d0-b961-d6ed4d889193.json deleted file mode 100644 index 0fa1b701b02ed2f528b926e1a8c61f71a47a6999..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_LamarckInfusion-14B-v3/8fe84e89-c582-44d0-b961-d6ed4d889193.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_LamarckInfusion-14B-v3/1762652580.5219798", - "retrieved_timestamp": "1762652580.5219798", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/LamarckInfusion-14B-v3", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/LamarckInfusion-14B-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7131378076836128 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6517667892516962 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4123867069486405 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38674496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48202083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5407247340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_lamarck-14b-prose-model_stock/3191b3a3-761a-42b4-bd31-b8dc22a4c722.json b/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_lamarck-14b-prose-model_stock/3191b3a3-761a-42b4-bd31-b8dc22a4c722.json deleted file mode 100644 index 99762f3b3fe5c5e0deaaf7f101535c14d4e58f5b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_lamarck-14b-prose-model_stock/3191b3a3-761a-42b4-bd31-b8dc22a4c722.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_lamarck-14b-prose-model_stock/1762652580.5312169", - "retrieved_timestamp": "1762652580.5312169", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/lamarck-14b-prose-model_stock", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/lamarck-14b-prose-model_stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4276486389446668 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6487621585665343 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3934563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48459375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.535405585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_lamarck-14b-reason-model_stock/ee7d14c9-aa49-49df-99fc-057e7dae251f.json b/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_lamarck-14b-reason-model_stock/ee7d14c9-aa49-49df-99fc-057e7dae251f.json deleted file mode 100644 index d1a1e57a8f98270023422b14ae99270bf8763850..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sometimesanotion/sometimesanotion_lamarck-14b-reason-model_stock/ee7d14c9-aa49-49df-99fc-057e7dae251f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sometimesanotion_lamarck-14b-reason-model_stock/1762652580.531434", - "retrieved_timestamp": "1762652580.531434", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sometimesanotion/lamarck-14b-reason-model_stock", - "developer": "sometimesanotion", - "inference_platform": "unknown", - "id": "sometimesanotion/lamarck-14b-reason-model_stock" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49646715160219335 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6568898541408251 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3580060422960725 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38422818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47408333333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5402260638297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sonthenguyen/sonthenguyen_ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415/7aa22e01-efb1-46f3-aad6-cc1fcb2c3783.json b/leaderboard_data/HFOpenLLMv2/sonthenguyen/sonthenguyen_ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415/7aa22e01-efb1-46f3-aad6-cc1fcb2c3783.json deleted file mode 100644 index 3ac2c301caf2cf9a71a8b11d1db32c89ffdd3f6c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sonthenguyen/sonthenguyen_ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415/7aa22e01-efb1-46f3-aad6-cc1fcb2c3783.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sonthenguyen_ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415/1762652580.531641", - "retrieved_timestamp": "1762652580.5316422", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415", - "developer": "sonthenguyen", - "inference_platform": "unknown", - "id": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28933784580468713 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38041816886828617 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24664429530201343 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3860625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14012632978723405 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 7.723 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sonthenguyen/sonthenguyen_ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205/c9e9de59-9ec8-4ca9-8869-f77cac14f3ed.json b/leaderboard_data/HFOpenLLMv2/sonthenguyen/sonthenguyen_ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205/c9e9de59-9ec8-4ca9-8869-f77cac14f3ed.json deleted file mode 100644 index d3eb4f0a4c6125122bc04ad86bc4817cc596b57c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sonthenguyen/sonthenguyen_ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205/c9e9de59-9ec8-4ca9-8869-f77cac14f3ed.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sonthenguyen_ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205/1762652580.531905", - "retrieved_timestamp": "1762652580.5319061", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205", - "developer": "sonthenguyen", - "inference_platform": "unknown", - "id": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3199377651298555 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39586243698929185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4271770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21243351063829788 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 7.723 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sonthenguyen/sonthenguyen_ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522/1e66ee5b-d3e7-4e2e-8a6f-d098938d4afd.json b/leaderboard_data/HFOpenLLMv2/sonthenguyen/sonthenguyen_ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522/1e66ee5b-d3e7-4e2e-8a6f-d098938d4afd.json deleted file mode 100644 index 9c6c41941381c6b5511d9cd1177e5769ea9f30f6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sonthenguyen/sonthenguyen_ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522/1e66ee5b-d3e7-4e2e-8a6f-d098938d4afd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sonthenguyen_ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522/1762652580.532109", - "retrieved_timestamp": "1762652580.53211", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522", - "developer": "sonthenguyen", - "inference_platform": "unknown", - "id": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37644117607946914 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3828367247244511 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2651006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4404166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20553523936170212 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 7.723 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sonthenguyen/sonthenguyen_zephyr-sft-bnb-4bit-DPO-mtbc-213steps/aabf8b57-c3fd-494b-b8e3-7ff1bdb0a15b.json b/leaderboard_data/HFOpenLLMv2/sonthenguyen/sonthenguyen_zephyr-sft-bnb-4bit-DPO-mtbc-213steps/aabf8b57-c3fd-494b-b8e3-7ff1bdb0a15b.json deleted file mode 100644 index 8cbf1a75ebb329aa78d488ee48cee0c4b62c7baa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sonthenguyen/sonthenguyen_zephyr-sft-bnb-4bit-DPO-mtbc-213steps/aabf8b57-c3fd-494b-b8e3-7ff1bdb0a15b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sonthenguyen_zephyr-sft-bnb-4bit-DPO-mtbc-213steps/1762652580.532313", - "retrieved_timestamp": "1762652580.532314", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbc-213steps", - "developer": "sonthenguyen", - "inference_platform": "unknown", - "id": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbc-213steps" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4275489035758454 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4197290890050172 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0256797583081571 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40863541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27086103723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sonthenguyen/sonthenguyen_zephyr-sft-bnb-4bit-DPO-mtbo-180steps/dd216882-a64e-4a0e-8fdc-ff5f99639566.json b/leaderboard_data/HFOpenLLMv2/sonthenguyen/sonthenguyen_zephyr-sft-bnb-4bit-DPO-mtbo-180steps/dd216882-a64e-4a0e-8fdc-ff5f99639566.json deleted file mode 100644 index f2af689da52bbb6e4b4b768eeb4abe63a72e9574..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sonthenguyen/sonthenguyen_zephyr-sft-bnb-4bit-DPO-mtbo-180steps/dd216882-a64e-4a0e-8fdc-ff5f99639566.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sonthenguyen_zephyr-sft-bnb-4bit-DPO-mtbo-180steps/1762652580.532533", - "retrieved_timestamp": "1762652580.5325341", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbo-180steps", - "developer": "sonthenguyen", - "inference_platform": "unknown", - "id": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbo-180steps" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40871443325930756 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4322585223071556 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38851041666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27476728723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sonthenguyen/sonthenguyen_zephyr-sft-bnb-4bit-DPO-mtbr-180steps/9fa1bbeb-ec5c-4d53-b2f3-eefa660bee5e.json b/leaderboard_data/HFOpenLLMv2/sonthenguyen/sonthenguyen_zephyr-sft-bnb-4bit-DPO-mtbr-180steps/9fa1bbeb-ec5c-4d53-b2f3-eefa660bee5e.json deleted file mode 100644 index a3a6e80cac7cafc12ffed4d213de30bf05410685..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sonthenguyen/sonthenguyen_zephyr-sft-bnb-4bit-DPO-mtbr-180steps/9fa1bbeb-ec5c-4d53-b2f3-eefa660bee5e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sonthenguyen_zephyr-sft-bnb-4bit-DPO-mtbr-180steps/1762652580.5327501", - "retrieved_timestamp": "1762652580.532751", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbr-180steps", - "developer": "sonthenguyen", - "inference_platform": "unknown", - "id": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbr-180steps" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4032190144372487 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43053552565190517 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2802013422818792 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42575 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2711103723404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sophosympatheia/sophosympatheia_Midnight-Miqu-70B-v1.5/3498b101-b86e-4968-abca-a3d3d42a4e5b.json b/leaderboard_data/HFOpenLLMv2/sophosympatheia/sophosympatheia_Midnight-Miqu-70B-v1.5/3498b101-b86e-4968-abca-a3d3d42a4e5b.json deleted file mode 100644 index 935f2a9d8d5fd000920730d770009ef7cda63c50..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sophosympatheia/sophosympatheia_Midnight-Miqu-70B-v1.5/3498b101-b86e-4968-abca-a3d3d42a4e5b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sophosympatheia_Midnight-Miqu-70B-v1.5/1762652580.532959", - "retrieved_timestamp": "1762652580.53296", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sophosympatheia/Midnight-Miqu-70B-v1.5", - "developer": "sophosympatheia", - "inference_platform": "unknown", - "id": "sophosympatheia/Midnight-Miqu-70B-v1.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6118465671086051 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5606228371685053 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42441666666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38248005319148937 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 68.977 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/speakleash/speakleash_Bielik-11B-v2.0-Instruct/4aaff24b-0364-4cc9-9680-5f5c6d04128b.json b/leaderboard_data/HFOpenLLMv2/speakleash/speakleash_Bielik-11B-v2.0-Instruct/4aaff24b-0364-4cc9-9680-5f5c6d04128b.json deleted file mode 100644 index 68794bbd002cc8b3144b8cf1c03dda8a277bf846..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/speakleash/speakleash_Bielik-11B-v2.0-Instruct/4aaff24b-0364-4cc9-9680-5f5c6d04128b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/speakleash_Bielik-11B-v2.0-Instruct/1762652580.533494", - "retrieved_timestamp": "1762652580.533494", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "speakleash/Bielik-11B-v2.0-Instruct", - "developer": "speakleash", - "inference_platform": "unknown", - "id": "speakleash/Bielik-11B-v2.0-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5252430218486948 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5361579931173499 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11858006042296072 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31711409395973156 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4467083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3351063829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 11.169 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/speakleash/speakleash_Bielik-11B-v2.1-Instruct/834e5703-00f3-47d6-817f-cf039c53d915.json b/leaderboard_data/HFOpenLLMv2/speakleash/speakleash_Bielik-11B-v2.1-Instruct/834e5703-00f3-47d6-817f-cf039c53d915.json deleted file mode 100644 index 2aaaf60e67ef336893efcc33c5fc07182ba4bd74..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/speakleash/speakleash_Bielik-11B-v2.1-Instruct/834e5703-00f3-47d6-817f-cf039c53d915.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/speakleash_Bielik-11B-v2.1-Instruct/1762652580.533698", - "retrieved_timestamp": "1762652580.533698", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "speakleash/Bielik-11B-v2.1-Instruct", - "developer": "speakleash", - "inference_platform": "unknown", - "id": "speakleash/Bielik-11B-v2.1-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5089817240477489 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5530119844151298 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26661631419939574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4185208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34466422872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 11.169 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/speakleash/speakleash_Bielik-11B-v2.2-Instruct/70c377ab-41b4-4c30-ade6-65cc52ab916a.json b/leaderboard_data/HFOpenLLMv2/speakleash/speakleash_Bielik-11B-v2.2-Instruct/70c377ab-41b4-4c30-ade6-65cc52ab916a.json deleted file mode 100644 index 12bb164c1e6ab093e13966611f572eee17fde81f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/speakleash/speakleash_Bielik-11B-v2.2-Instruct/70c377ab-41b4-4c30-ade6-65cc52ab916a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/speakleash_Bielik-11B-v2.2-Instruct/1762652580.533901", - "retrieved_timestamp": "1762652580.5339022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "speakleash/Bielik-11B-v2.2-Instruct", - "developer": "speakleash", - "inference_platform": "unknown", - "id": "speakleash/Bielik-11B-v2.2-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5551935531057595 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5596561190863629 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2681268882175227 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41712499999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3486535904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 11.169 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/speakleash/speakleash_Bielik-11B-v2.3-Instruct/822b7413-b84e-4df0-8aca-cc0e95283a86.json b/leaderboard_data/HFOpenLLMv2/speakleash/speakleash_Bielik-11B-v2.3-Instruct/822b7413-b84e-4df0-8aca-cc0e95283a86.json deleted file mode 100644 index ab7277428ee5b305cb295c98658e7289753eb741..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/speakleash/speakleash_Bielik-11B-v2.3-Instruct/822b7413-b84e-4df0-8aca-cc0e95283a86.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/speakleash_Bielik-11B-v2.3-Instruct/1762652580.534104", - "retrieved_timestamp": "1762652580.534104", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "speakleash/Bielik-11B-v2.3-Instruct", - "developer": "speakleash", - "inference_platform": "unknown", - "id": "speakleash/Bielik-11B-v2.3-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.558290890393046 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5662699020280031 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34060402684563756 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4518229166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34441489361702127 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 11.169 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/speakleash/speakleash_Bielik-11B-v2/680f5fa0-fb15-4687-a40b-7807af2e0fe5.json b/leaderboard_data/HFOpenLLMv2/speakleash/speakleash_Bielik-11B-v2/680f5fa0-fb15-4687-a40b-7807af2e0fe5.json deleted file mode 100644 index 7920c362735cb64bdb4fb019518b04ce4617e803..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/speakleash/speakleash_Bielik-11B-v2/680f5fa0-fb15-4687-a40b-7807af2e0fe5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/speakleash_Bielik-11B-v2/1762652580.533211", - "retrieved_timestamp": "1762652580.533211", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "speakleash/Bielik-11B-v2", - "developer": "speakleash", - "inference_platform": "unknown", - "id": "speakleash/Bielik-11B-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23810489501190177 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49308409091594996 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39244791666666673 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3137466755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 11.169 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/spow12/spow12_ChatWaifu_12B_v2.0/f9798139-bc7d-49e7-bc42-bcd0ee808c68.json b/leaderboard_data/HFOpenLLMv2/spow12/spow12_ChatWaifu_12B_v2.0/f9798139-bc7d-49e7-bc42-bcd0ee808c68.json deleted file mode 100644 index 12ee4d851583a9c6276157129a200f591df1417e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/spow12/spow12_ChatWaifu_12B_v2.0/f9798139-bc7d-49e7-bc42-bcd0ee808c68.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/spow12_ChatWaifu_12B_v2.0/1762652580.534569", - "retrieved_timestamp": "1762652580.53457", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "spow12/ChatWaifu_12B_v2.0", - "developer": "spow12", - "inference_platform": "unknown", - "id": "spow12/ChatWaifu_12B_v2.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47675833455232114 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5207681738205238 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07099697885196375 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44317708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33876329787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/spow12/spow12_ChatWaifu_22B_v2.0_preview/d0e259de-1261-4d31-a1d4-4689112deca0.json b/leaderboard_data/HFOpenLLMv2/spow12/spow12_ChatWaifu_22B_v2.0_preview/d0e259de-1261-4d31-a1d4-4689112deca0.json deleted file mode 100644 index 98a0bfd1fd65b26e7145736eefe9b03930e8925e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/spow12/spow12_ChatWaifu_22B_v2.0_preview/d0e259de-1261-4d31-a1d4-4689112deca0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/spow12_ChatWaifu_22B_v2.0_preview/1762652580.534824", - "retrieved_timestamp": "1762652580.5348248", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "spow12/ChatWaifu_22B_v2.0_preview", - "developer": "spow12", - "inference_platform": "unknown", - "id": "spow12/ChatWaifu_22B_v2.0_preview" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6744947849483814 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6170153091362338 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18882175226586104 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31543624161073824 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3685416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39876994680851063 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/spow12/spow12_ChatWaifu_v1.4/ac56cc08-585f-4930-959d-7cbad08c34b0.json b/leaderboard_data/HFOpenLLMv2/spow12/spow12_ChatWaifu_v1.4/ac56cc08-585f-4930-959d-7cbad08c34b0.json deleted file mode 100644 index b783fbe665ea0432da141b2454d1a7e419daaae0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/spow12/spow12_ChatWaifu_v1.4/ac56cc08-585f-4930-959d-7cbad08c34b0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/spow12_ChatWaifu_v1.4/1762652580.535029", - "retrieved_timestamp": "1762652580.5350301", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "spow12/ChatWaifu_v1.4", - "developer": "spow12", - "inference_platform": "unknown", - "id": "spow12/ChatWaifu_v1.4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5690567693719332 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5176247229970669 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10574018126888217 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47433333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3474900265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/spow12/spow12_ChatWaifu_v2.0_22B/7698fd4d-b2d8-4ba9-98be-d96f9c666b2f.json b/leaderboard_data/HFOpenLLMv2/spow12/spow12_ChatWaifu_v2.0_22B/7698fd4d-b2d8-4ba9-98be-d96f9c666b2f.json deleted file mode 100644 index 63c9e5e4b95238bd2aa6959174aa52f2b3f54cdb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/spow12/spow12_ChatWaifu_v2.0_22B/7698fd4d-b2d8-4ba9-98be-d96f9c666b2f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/spow12_ChatWaifu_v2.0_22B/1762652580.535436", - "retrieved_timestamp": "1762652580.535437", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "spow12/ChatWaifu_v2.0_22B", - "developer": "spow12", - "inference_platform": "unknown", - "id": "spow12/ChatWaifu_v2.0_22B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6517384982956334 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5908050619550995 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20317220543806647 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3841979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3812333776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/spow12/spow12_ChatWaifu_v2.0_22B/cccb45b5-c5cb-43c0-be27-bacbb4db5c5b.json b/leaderboard_data/HFOpenLLMv2/spow12/spow12_ChatWaifu_v2.0_22B/cccb45b5-c5cb-43c0-be27-bacbb4db5c5b.json deleted file mode 100644 index 45e00bcca6d2ea939fc9a15e410ba55836a5b1e4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/spow12/spow12_ChatWaifu_v2.0_22B/cccb45b5-c5cb-43c0-be27-bacbb4db5c5b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/spow12_ChatWaifu_v2.0_22B/1762652580.5352252", - "retrieved_timestamp": "1762652580.535226", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "spow12/ChatWaifu_v2.0_22B", - "developer": "spow12", - "inference_platform": "unknown", - "id": "spow12/ChatWaifu_v2.0_22B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6510891102275296 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.592630190761292 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18580060422960726 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32466442953020136 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3841979166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3835605053191489 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 22.247 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ssmits/ssmits_Qwen2.5-95B-Instruct/1c441afa-b8ac-4ff9-b881-e75f8765dd8e.json b/leaderboard_data/HFOpenLLMv2/ssmits/ssmits_Qwen2.5-95B-Instruct/1c441afa-b8ac-4ff9-b881-e75f8765dd8e.json deleted file mode 100644 index 907a994b055974762f04b1656316f2def642a6d7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ssmits/ssmits_Qwen2.5-95B-Instruct/1c441afa-b8ac-4ff9-b881-e75f8765dd8e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ssmits_Qwen2.5-95B-Instruct/1762652580.535626", - "retrieved_timestamp": "1762652580.5356271", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ssmits/Qwen2.5-95B-Instruct", - "developer": "ssmits", - "inference_platform": "unknown", - "id": "ssmits/Qwen2.5-95B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8431051831363006 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7037799697488242 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5302114803625377 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3640939597315436 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4283854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5216921542553191 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 94.648 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/stabilityai/stabilityai_StableBeluga2/ca7ae45f-833a-4ce2-9fb7-27601e9434c8.json b/leaderboard_data/HFOpenLLMv2/stabilityai/stabilityai_StableBeluga2/ca7ae45f-833a-4ce2-9fb7-27601e9434c8.json deleted file mode 100644 index d16ac4851954d9c02b6e5e687d7ddda5b2150ad9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/stabilityai/stabilityai_StableBeluga2/ca7ae45f-833a-4ce2-9fb7-27601e9434c8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/stabilityai_StableBeluga2/1762652580.535889", - "retrieved_timestamp": "1762652580.5358899", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "stabilityai/StableBeluga2", - "developer": "stabilityai", - "inference_platform": "unknown", - "id": "stabilityai/StableBeluga2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37871403431783224 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5824128134553807 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04380664652567976 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3162751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47296875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3326130319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 68.977 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/stabilityai/stabilityai_stablelm-2-12b-chat/22aad948-bcc7-4f8f-bb42-a839e3d1be96.json b/leaderboard_data/HFOpenLLMv2/stabilityai/stabilityai_stablelm-2-12b-chat/22aad948-bcc7-4f8f-bb42-a839e3d1be96.json deleted file mode 100644 index b05d1b5970c84313521ce67240aec0f72fcc0412..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/stabilityai/stabilityai_stablelm-2-12b-chat/22aad948-bcc7-4f8f-bb42-a839e3d1be96.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-2-12b-chat/1762652580.536706", - "retrieved_timestamp": "1762652580.5367072", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "stabilityai/stablelm-2-12b-chat", - "developer": "stabilityai", - "inference_platform": "unknown", - "id": "stabilityai/stablelm-2-12b-chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4081647805600252 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4672024731282805 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05362537764350453 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3914270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2734375 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "StableLmForCausalLM", - "params_billions": 12.143 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/stabilityai/stabilityai_stablelm-2-12b/21f9d0a5-3ed3-40de-a233-a45f68d669e0.json b/leaderboard_data/HFOpenLLMv2/stabilityai/stabilityai_stablelm-2-12b/21f9d0a5-3ed3-40de-a233-a45f68d669e0.json deleted file mode 100644 index d1d54894fc6b256d76f2f409f6e0e246a46b1f04..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/stabilityai/stabilityai_stablelm-2-12b/21f9d0a5-3ed3-40de-a233-a45f68d669e0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-2-12b/1762652580.536407", - "retrieved_timestamp": "1762652580.536408", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "stabilityai/stablelm-2-12b", - "developer": "stabilityai", - "inference_platform": "unknown", - "id": "stabilityai/stablelm-2-12b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1569214129620518 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4508654171114765 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2785234899328859 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44788541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3071808510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "StableLmForCausalLM", - "params_billions": 12.143 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/stabilityai/stabilityai_stablelm-2-1_6b-chat/552dc523-3082-4980-a533-ad5d48f1260a.json b/leaderboard_data/HFOpenLLMv2/stabilityai/stabilityai_stablelm-2-1_6b-chat/552dc523-3082-4980-a533-ad5d48f1260a.json deleted file mode 100644 index df070cbf7d181c1814ef268e72d24ed1ad97b332..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/stabilityai/stabilityai_stablelm-2-1_6b-chat/552dc523-3082-4980-a533-ad5d48f1260a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-2-1_6b-chat/1762652580.5372329", - "retrieved_timestamp": "1762652580.5372338", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "stabilityai/stablelm-2-1_6b-chat", - "developer": "stabilityai", - "inference_platform": "unknown", - "id": "stabilityai/stablelm-2-1_6b-chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30599919325168334 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3390172395486522 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.024924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35796875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16215093085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "StableLmForCausalLM", - "params_billions": 1.645 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/stabilityai/stabilityai_stablelm-2-1_6b/78db2373-3fcf-468b-8c87-21db03b2fdda.json b/leaderboard_data/HFOpenLLMv2/stabilityai/stabilityai_stablelm-2-1_6b/78db2373-3fcf-468b-8c87-21db03b2fdda.json deleted file mode 100644 index 8939f8e67bcbc9ccefc998bf677ff364e82471bd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/stabilityai/stabilityai_stablelm-2-1_6b/78db2373-3fcf-468b-8c87-21db03b2fdda.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-2-1_6b/1762652580.5369868", - "retrieved_timestamp": "1762652580.536989", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "stabilityai/stablelm-2-1_6b", - "developer": "stabilityai", - "inference_platform": "unknown", - "id": "stabilityai/stablelm-2-1_6b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11570521771122844 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.338457720511071 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38819791666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1463597074468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "StableLmForCausalLM", - "params_billions": 1.645 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/stabilityai/stabilityai_stablelm-2-zephyr-1_6b/96179bdf-3e1a-47ee-9fc2-ac0b23307556.json b/leaderboard_data/HFOpenLLMv2/stabilityai/stabilityai_stablelm-2-zephyr-1_6b/96179bdf-3e1a-47ee-9fc2-ac0b23307556.json deleted file mode 100644 index 884f6ab74b06545e720c510ecd9900204a8a5c2a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/stabilityai/stabilityai_stablelm-2-zephyr-1_6b/96179bdf-3e1a-47ee-9fc2-ac0b23307556.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-2-zephyr-1_6b/1762652580.537471", - "retrieved_timestamp": "1762652580.537472", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "stabilityai/stablelm-2-zephyr-1_6b", - "developer": "stabilityai", - "inference_platform": "unknown", - "id": "stabilityai/stablelm-2-zephyr-1_6b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32793100085550786 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3351608706280727 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3511458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17137632978723405 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "StableLmForCausalLM", - "params_billions": 1.645 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/stabilityai/stabilityai_stablelm-3b-4e1t/3280f4cf-dbb7-46ad-a64c-d4e3c4a58e50.json b/leaderboard_data/HFOpenLLMv2/stabilityai/stabilityai_stablelm-3b-4e1t/3280f4cf-dbb7-46ad-a64c-d4e3c4a58e50.json deleted file mode 100644 index 397312a11d46281f76600d1b01870d048065157b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/stabilityai/stabilityai_stablelm-3b-4e1t/3280f4cf-dbb7-46ad-a64c-d4e3c4a58e50.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-3b-4e1t/1762652580.5377111", - "retrieved_timestamp": "1762652580.537712", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "stabilityai/stablelm-3b-4e1t", - "developer": "stabilityai", - "inference_platform": "unknown", - "id": "stabilityai/stablelm-3b-4e1t" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22031986240951784 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3504211415826912 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23741610738255034 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37778124999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1668882978723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "StableLmForCausalLM", - "params_billions": 2.795 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/stabilityai/stabilityai_stablelm-zephyr-3b/94960f86-3898-4add-8590-8abeff66a987.json b/leaderboard_data/HFOpenLLMv2/stabilityai/stabilityai_stablelm-zephyr-3b/94960f86-3898-4add-8590-8abeff66a987.json deleted file mode 100644 index 8b49ac6acd829e56f02304fde135f3520cd6802a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/stabilityai/stabilityai_stablelm-zephyr-3b/94960f86-3898-4add-8590-8abeff66a987.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/stabilityai_stablelm-zephyr-3b/1762652580.537945", - "retrieved_timestamp": "1762652580.5379462", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "stabilityai/stablelm-zephyr-3b", - "developer": "stabilityai", - "inference_platform": "unknown", - "id": "stabilityai/stablelm-zephyr-3b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36832271705740766 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3866361442837871 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23909395973154363 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4183020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17677859042553193 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "StableLmForCausalLM", - "params_billions": 2.795 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sthenno-com/sthenno-com_miscii-14b-0130/40a09314-bb43-41ff-a36a-b39064c37add.json b/leaderboard_data/HFOpenLLMv2/sthenno-com/sthenno-com_miscii-14b-0130/40a09314-bb43-41ff-a36a-b39064c37add.json deleted file mode 100644 index d9ee337aba8e3f030bfc2079c6ea11bec0a91ff0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sthenno-com/sthenno-com_miscii-14b-0130/40a09314-bb43-41ff-a36a-b39064c37add.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sthenno-com_miscii-14b-0130/1762652580.540879", - "retrieved_timestamp": "1762652580.54088", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sthenno-com/miscii-14b-0130", - "developer": "sthenno-com", - "inference_platform": "unknown", - "id": "sthenno-com/miscii-14b-0130" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6647029880716498 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6505409113818335 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43202416918429004 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38171140939597314 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4911666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5363198138297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sthenno-com/sthenno-com_miscii-14b-0218/f73b09b4-020d-49fd-8ede-6a690088be94.json b/leaderboard_data/HFOpenLLMv2/sthenno-com/sthenno-com_miscii-14b-0218/f73b09b4-020d-49fd-8ede-6a690088be94.json deleted file mode 100644 index 49e44adec9a1ebd80af82e67f3ec5cf59e5e3e84..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sthenno-com/sthenno-com_miscii-14b-0218/f73b09b4-020d-49fd-8ede-6a690088be94.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sthenno-com_miscii-14b-0218/1762652580.541173", - "retrieved_timestamp": "1762652580.541174", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sthenno-com/miscii-14b-0218", - "developer": "sthenno-com", - "inference_platform": "unknown", - "id": "sthenno-com/miscii-14b-0218" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7655941790006073 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6558708629267258 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5143504531722054 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38338926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4272708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5297539893617021 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sthenno-com/sthenno-com_miscii-14b-1028/3f2549af-9bc5-4ad1-a429-79bbb91c929f.json b/leaderboard_data/HFOpenLLMv2/sthenno-com/sthenno-com_miscii-14b-1028/3f2549af-9bc5-4ad1-a429-79bbb91c929f.json deleted file mode 100644 index 87c6d1f58dc9094236230967abfaa5c55153b534..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sthenno-com/sthenno-com_miscii-14b-1028/3f2549af-9bc5-4ad1-a429-79bbb91c929f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sthenno-com_miscii-14b-1028/1762652580.541399", - "retrieved_timestamp": "1762652580.5414", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sthenno-com/miscii-14b-1028", - "developer": "sthenno-com", - "inference_platform": "unknown", - "id": "sthenno-com/miscii-14b-1028" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8236711924360696 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.64483340535341 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5030211480362538 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565436241610738 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41815625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5152925531914894 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sthenno-com/sthenno-com_miscii-14b-1225/ab816ab5-9edb-49d1-8f89-c3dc36a8a0de.json b/leaderboard_data/HFOpenLLMv2/sthenno-com/sthenno-com_miscii-14b-1225/ab816ab5-9edb-49d1-8f89-c3dc36a8a0de.json deleted file mode 100644 index 4e3b240dffd9cda2f7139c88f988e3ba19480071..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sthenno-com/sthenno-com_miscii-14b-1225/ab816ab5-9edb-49d1-8f89-c3dc36a8a0de.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sthenno-com_miscii-14b-1225/1762652580.541638", - "retrieved_timestamp": "1762652580.5416389", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sthenno-com/miscii-14b-1225", - "developer": "sthenno-com", - "inference_platform": "unknown", - "id": "sthenno-com/miscii-14b-1225" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.787800812954073 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6571708988407374 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4516616314199396 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3775167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4365729166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5271775265957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempesthenno-0120/9285700f-106e-481d-88bc-5d59b5d57377.json b/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempesthenno-0120/9285700f-106e-481d-88bc-5d59b5d57377.json deleted file mode 100644 index 7ef4a1ea8a43cd484cf3ca874406cdf9161a7ea5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempesthenno-0120/9285700f-106e-481d-88bc-5d59b5d57377.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-0120/1762652580.538178", - "retrieved_timestamp": "1762652580.5381792", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sthenno/tempesthenno-0120", - "developer": "sthenno", - "inference_platform": "unknown", - "id": "sthenno/tempesthenno-0120" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5390319906736348 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6373174111347703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33534743202416917 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39429530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46332291666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5290059840425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempesthenno-fusion-0309/97793808-7d23-4ec7-b1dd-0c7b1dea1c3c.json b/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempesthenno-fusion-0309/97793808-7d23-4ec7-b1dd-0c7b1dea1c3c.json deleted file mode 100644 index 9040a89fe0862b15e59007b38cd2bb4226c22608..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempesthenno-fusion-0309/97793808-7d23-4ec7-b1dd-0c7b1dea1c3c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-fusion-0309/1762652580.538481", - "retrieved_timestamp": "1762652580.538483", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sthenno/tempesthenno-fusion-0309", - "developer": "sthenno", - "inference_platform": "unknown", - "id": "sthenno/tempesthenno-fusion-0309" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7691913013027656 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6580880569586895 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47658610271903323 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699664429530201 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4325104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5258477393617021 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempesthenno-kto-0205-ckpt80/689a346d-191e-4ec1-93b5-6f64c1a293ff.json b/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempesthenno-kto-0205-ckpt80/689a346d-191e-4ec1-93b5-6f64c1a293ff.json deleted file mode 100644 index 8124f6b2abdb1eba2a927df4f7b40d64f69b68b8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempesthenno-kto-0205-ckpt80/689a346d-191e-4ec1-93b5-6f64c1a293ff.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-kto-0205-ckpt80/1762652580.5387661", - "retrieved_timestamp": "1762652580.538767", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sthenno/tempesthenno-kto-0205-ckpt80", - "developer": "sthenno", - "inference_platform": "unknown", - "id": "sthenno/tempesthenno-kto-0205-ckpt80" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8054362425032248 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.654273895095419 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34815436241610737 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4247604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5285904255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempesthenno-nuslerp-001/1d12c40a-a9b5-483b-aaac-07e323de73a9.json b/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempesthenno-nuslerp-001/1d12c40a-a9b5-483b-aaac-07e323de73a9.json deleted file mode 100644 index cbfb09098d056a10b7130462f62857e97a9fe89e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempesthenno-nuslerp-001/1d12c40a-a9b5-483b-aaac-07e323de73a9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-nuslerp-001/1762652580.5390232", - "retrieved_timestamp": "1762652580.5390239", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sthenno/tempesthenno-nuslerp-001", - "developer": "sthenno", - "inference_platform": "unknown", - "id": "sthenno/tempesthenno-nuslerp-001" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7926468437080281 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6577675676172494 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47583081570996977 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5256815159574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempesthenno-nuslerp-0124/b814d738-b9f3-42df-8774-0708d456c2ea.json b/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempesthenno-nuslerp-0124/b814d738-b9f3-42df-8774-0708d456c2ea.json deleted file mode 100644 index 67d6c3139fd3ddb0cb07ccde8ff88e3150517d62..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempesthenno-nuslerp-0124/b814d738-b9f3-42df-8774-0708d456c2ea.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-nuslerp-0124/1762652580.539254", - "retrieved_timestamp": "1762652580.5392551", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sthenno/tempesthenno-nuslerp-0124", - "developer": "sthenno", - "inference_platform": "unknown", - "id": "sthenno/tempesthenno-nuslerp-0124" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7003982765728267 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6468547741903091 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.411631419939577 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3901006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48592708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5352393617021277 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempesthenno-ppo-ckpt40/7c2e9776-92e4-457b-ae08-32c3e351b8e1.json b/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempesthenno-ppo-ckpt40/7c2e9776-92e4-457b-ae08-32c3e351b8e1.json deleted file mode 100644 index 81f0aa71f5958d3dd0d265c83dd5f254336ac2bf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempesthenno-ppo-ckpt40/7c2e9776-92e4-457b-ae08-32c3e351b8e1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-ppo-ckpt40/1762652580.539634", - "retrieved_timestamp": "1762652580.539635", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sthenno/tempesthenno-ppo-ckpt40", - "developer": "sthenno", - "inference_platform": "unknown", - "id": "sthenno/tempesthenno-ppo-ckpt40" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7923221496739761 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6549600322869433 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4735649546827795 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3775167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4351770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5291722074468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempesthenno-sft-0309-ckpt10/65f19ffe-7428-41e5-a52d-02fad8e595c0.json b/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempesthenno-sft-0309-ckpt10/65f19ffe-7428-41e5-a52d-02fad8e595c0.json deleted file mode 100644 index 865ca0a180d07cbdedf302453b1776641283b5af..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempesthenno-sft-0309-ckpt10/65f19ffe-7428-41e5-a52d-02fad8e595c0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-sft-0309-ckpt10/1762652580.539892", - "retrieved_timestamp": "1762652580.539893", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sthenno/tempesthenno-sft-0309-ckpt10", - "developer": "sthenno", - "inference_platform": "unknown", - "id": "sthenno/tempesthenno-sft-0309-ckpt10" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7743620260907724 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6551647758995857 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47205438066465255 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716442953020134 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4364166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5257646276595744 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempesthenno-sft-0314-stage1-ckpt50/07d2cbaf-fa54-4d0b-bdb7-4179b5f3bebe.json b/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempesthenno-sft-0314-stage1-ckpt50/07d2cbaf-fa54-4d0b-bdb7-4179b5f3bebe.json deleted file mode 100644 index 916ac5c5c7c2a03a490405cafcfa85db71083deb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempesthenno-sft-0314-stage1-ckpt50/07d2cbaf-fa54-4d0b-bdb7-4179b5f3bebe.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sthenno_tempesthenno-sft-0314-stage1-ckpt50/1762652580.540305", - "retrieved_timestamp": "1762652580.540307", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sthenno/tempesthenno-sft-0314-stage1-ckpt50", - "developer": "sthenno", - "inference_platform": "unknown", - "id": "sthenno/tempesthenno-sft-0314-stage1-ckpt50" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7393659933421101 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6601015847983588 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46827794561933533 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3733221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44286458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5301695478723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempestissimo-14b-0309/eab26e25-e8bd-4c19-8f14-a933506372c6.json b/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempestissimo-14b-0309/eab26e25-e8bd-4c19-8f14-a933506372c6.json deleted file mode 100644 index 926380e5df5e69a8289781fbd964715bd83ee142..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sthenno/sthenno_tempestissimo-14b-0309/eab26e25-e8bd-4c19-8f14-a933506372c6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sthenno_tempestissimo-14b-0309/1762652580.540641", - "retrieved_timestamp": "1762652580.540643", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sthenno/tempestissimo-14b-0309", - "developer": "sthenno", - "inference_platform": "unknown", - "id": "sthenno/tempestissimo-14b-0309" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7548781677061308 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6587329699954757 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.479607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36661073825503354 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43123958333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.528091755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/streamerbtw1002/streamerbtw1002_Nexuim-R1-7B-Instruct/3e78ef29-f546-41b0-af2b-f3ae4154e396.json b/leaderboard_data/HFOpenLLMv2/streamerbtw1002/streamerbtw1002_Nexuim-R1-7B-Instruct/3e78ef29-f546-41b0-af2b-f3ae4154e396.json deleted file mode 100644 index 3118d17ae56ad49920193949ec4fd06686d3f09f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/streamerbtw1002/streamerbtw1002_Nexuim-R1-7B-Instruct/3e78ef29-f546-41b0-af2b-f3ae4154e396.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/streamerbtw1002_Nexuim-R1-7B-Instruct/1762652580.541884", - "retrieved_timestamp": "1762652580.541885", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "streamerbtw1002/Nexuim-R1-7B-Instruct", - "developer": "streamerbtw1002", - "inference_platform": "unknown", - "id": "streamerbtw1002/Nexuim-R1-7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6934289906337407 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5175174748142363 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44561933534743203 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33555208333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.413813164893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/stupidity-ai/stupidity-ai_Llama-3-8B-Instruct-MultiMoose/2f177d4b-50fb-4a87-a157-84d1094d3971.json b/leaderboard_data/HFOpenLLMv2/stupidity-ai/stupidity-ai_Llama-3-8B-Instruct-MultiMoose/2f177d4b-50fb-4a87-a157-84d1094d3971.json deleted file mode 100644 index 684fa899ea328251834fb46f5a40e0eee4521cc3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/stupidity-ai/stupidity-ai_Llama-3-8B-Instruct-MultiMoose/2f177d4b-50fb-4a87-a157-84d1094d3971.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/stupidity-ai_Llama-3-8B-Instruct-MultiMoose/1762652580.5421681", - "retrieved_timestamp": "1762652580.542169", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "stupidity-ai/Llama-3-8B-Instruct-MultiMoose", - "developer": "stupidity-ai", - "inference_platform": "unknown", - "id": "stupidity-ai/Llama-3-8B-Instruct-MultiMoose" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23181048506850713 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2822965317600308 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3485416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.109375 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_Clarus-7B-v0.1/b1070a2a-7694-472d-84a4-f20f4cfe1b88.json b/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_Clarus-7B-v0.1/b1070a2a-7694-472d-84a4-f20f4cfe1b88.json deleted file mode 100644 index 13b8dc9dba20ad0ec5133dcbb2eea7da1d123b9f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_Clarus-7B-v0.1/b1070a2a-7694-472d-84a4-f20f4cfe1b88.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/suayptalha_Clarus-7B-v0.1/1762652580.542475", - "retrieved_timestamp": "1762652580.5424771", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "suayptalha/Clarus-7B-v0.1", - "developer": "suayptalha", - "inference_platform": "unknown", - "id": "suayptalha/Clarus-7B-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7454110648634512 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5496611433440965 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49244712990936557 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44295833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4387466755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_Clarus-7B-v0.2/c85bdaec-43e5-4507-a615-89549901e392.json b/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_Clarus-7B-v0.2/c85bdaec-43e5-4507-a615-89549901e392.json deleted file mode 100644 index c9da529bcb7624853afefd525799d75ff86e050e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_Clarus-7B-v0.2/c85bdaec-43e5-4507-a615-89549901e392.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/suayptalha_Clarus-7B-v0.2/1762652580.542793", - "retrieved_timestamp": "1762652580.542794", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "suayptalha/Clarus-7B-v0.2", - "developer": "suayptalha", - "inference_platform": "unknown", - "id": "suayptalha/Clarus-7B-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7679423928509688 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5490057426751466 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48564954682779454 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44165625000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4399933510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.613 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_Clarus-7B-v0.3/21d1f676-4a7d-4305-b248-4a72d7ce0121.json b/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_Clarus-7B-v0.3/21d1f676-4a7d-4305-b248-4a72d7ce0121.json deleted file mode 100644 index 1938461c05503f6cf55320b9d3757f5d1e66c9ff..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_Clarus-7B-v0.3/21d1f676-4a7d-4305-b248-4a72d7ce0121.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/suayptalha_Clarus-7B-v0.3/1762652580.543006", - "retrieved_timestamp": "1762652580.543007", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "suayptalha/Clarus-7B-v0.3", - "developer": "suayptalha", - "inference_platform": "unknown", - "id": "suayptalha/Clarus-7B-v0.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7509064836855099 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5525985716155296 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4879154078549849 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44022916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4384973404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_Falcon3-Jessi-v0.4-7B-Slerp/9a9cb5f7-e95a-46c5-90ed-42152fc0a617.json b/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_Falcon3-Jessi-v0.4-7B-Slerp/9a9cb5f7-e95a-46c5-90ed-42152fc0a617.json deleted file mode 100644 index 32715309352dd3b8233081c6ed47d4f68bb536ef..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_Falcon3-Jessi-v0.4-7B-Slerp/9a9cb5f7-e95a-46c5-90ed-42152fc0a617.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/suayptalha_Falcon3-Jessi-v0.4-7B-Slerp/1762652580.543463", - "retrieved_timestamp": "1762652580.543463", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "suayptalha/Falcon3-Jessi-v0.4-7B-Slerp", - "developer": "suayptalha", - "inference_platform": "unknown", - "id": "suayptalha/Falcon3-Jessi-v0.4-7B-Slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7676176988169169 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5590927389495824 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48121875000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.406000664893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_HomerCreativeAnvita-Mix-Qw7B/7bb9a15a-ece4-4fb7-b0ae-dc8cf69efb6b.json b/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_HomerCreativeAnvita-Mix-Qw7B/7bb9a15a-ece4-4fb7-b0ae-dc8cf69efb6b.json deleted file mode 100644 index 549eeb07925a7c184e407e14a5b3420daa84e085..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_HomerCreativeAnvita-Mix-Qw7B/7bb9a15a-ece4-4fb7-b0ae-dc8cf69efb6b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/suayptalha_HomerCreativeAnvita-Mix-Qw7B/1762652580.543669", - "retrieved_timestamp": "1762652580.54367", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "suayptalha/HomerCreativeAnvita-Mix-Qw7B", - "developer": "suayptalha", - "inference_platform": "unknown", - "id": "suayptalha/HomerCreativeAnvita-Mix-Qw7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7807816593305763 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5564653181490319 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3610271903323263 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44159375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4444813829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_Lamarckvergence-14B/2c918f65-3565-41f6-a9c2-d042608bc592.json b/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_Lamarckvergence-14B/2c918f65-3565-41f6-a9c2-d042608bc592.json deleted file mode 100644 index d5d366a9b055f0bb6c39bcae8b61d19f153d7146..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_Lamarckvergence-14B/2c918f65-3565-41f6-a9c2-d042608bc592.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/suayptalha_Lamarckvergence-14B/1762652580.544092", - "retrieved_timestamp": "1762652580.544093", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "suayptalha/Lamarckvergence-14B", - "developer": "suayptalha", - "inference_platform": "unknown", - "id": "suayptalha/Lamarckvergence-14B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7655941790006073 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.651698573892736 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5400302114803626 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36325503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44215625000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5283410904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_Lix-14B-v0.1/f4866eb3-28b0-416b-92c7-764d38905686.json b/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_Lix-14B-v0.1/f4866eb3-28b0-416b-92c7-764d38905686.json deleted file mode 100644 index 3d63f2e3f21f4afada9ae66cca8ec2618091a165..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_Lix-14B-v0.1/f4866eb3-28b0-416b-92c7-764d38905686.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/suayptalha_Lix-14B-v0.1/1762652580.5443048", - "retrieved_timestamp": "1762652580.5443058", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "suayptalha/Lix-14B-v0.1", - "developer": "suayptalha", - "inference_platform": "unknown", - "id": "suayptalha/Lix-14B-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7813313120298586 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6607910825152539 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5294561933534743 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3699664429530201 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43378125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5314162234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_Maestro-10B/b302d40a-64bd-4cdd-b5fb-3a9c1dbf1406.json b/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_Maestro-10B/b302d40a-64bd-4cdd-b5fb-3a9c1dbf1406.json deleted file mode 100644 index b280fd7c3b0f1fb797fb1034992328226420f495..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_Maestro-10B/b302d40a-64bd-4cdd-b5fb-3a9c1dbf1406.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/suayptalha_Maestro-10B/1762652580.5447612", - "retrieved_timestamp": "1762652580.5447621", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "suayptalha/Maestro-10B", - "developer": "suayptalha", - "inference_platform": "unknown", - "id": "suayptalha/Maestro-10B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7767601076255447 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5746090622656775 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19108761329305135 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43972916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42179188829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_Rombos-2.5-T.E-8.1/fa7a31f9-9c10-4f5f-a06f-e628363a726a.json b/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_Rombos-2.5-T.E-8.1/fa7a31f9-9c10-4f5f-a06f-e628363a726a.json deleted file mode 100644 index 74c83f5390d2dffc5c734001b80b52cfed061464..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/suayptalha/suayptalha_Rombos-2.5-T.E-8.1/fa7a31f9-9c10-4f5f-a06f-e628363a726a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/suayptalha_Rombos-2.5-T.E-8.1/1762652580.544959", - "retrieved_timestamp": "1762652580.544959", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "suayptalha/Rombos-2.5-T.E-8.1", - "developer": "suayptalha", - "inference_platform": "unknown", - "id": "suayptalha/Rombos-2.5-T.E-8.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6925047762159957 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5514641249478369 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49244712990936557 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41663541666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4445644946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sumink/sumink_Qmerft/11243917-73a3-484e-ac8b-40065c65ea8c.json b/leaderboard_data/HFOpenLLMv2/sumink/sumink_Qmerft/11243917-73a3-484e-ac8b-40065c65ea8c.json deleted file mode 100644 index 8c6d077306c821474915d0be3e1ab1e87969efe0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sumink/sumink_Qmerft/11243917-73a3-484e-ac8b-40065c65ea8c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sumink_Qmerft/1762652580.5451572", - "retrieved_timestamp": "1762652580.5451572", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sumink/Qmerft", - "developer": "sumink", - "inference_platform": "unknown", - "id": "sumink/Qmerft" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15639724819035714 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29390930175643937 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0022658610271903325 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36876041666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11569148936170212 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.777 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sumink/sumink_llftfl7/ed7c36f0-5b1a-45ef-be66-f9880cad099d.json b/leaderboard_data/HFOpenLLMv2/sumink/sumink_llftfl7/ed7c36f0-5b1a-45ef-be66-f9880cad099d.json deleted file mode 100644 index b1d386585ae16c65678a14e87faf5ac5a7421c09..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sumink/sumink_llftfl7/ed7c36f0-5b1a-45ef-be66-f9880cad099d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sumink_llftfl7/1762652580.548197", - "retrieved_timestamp": "1762652580.548198", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sumink/llftfl7", - "developer": "sumink", - "inference_platform": "unknown", - "id": "sumink/llftfl7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17143512546709397 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37864273336631166 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.010574018126888218 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36320833333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17428523936170212 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sumink/sumink_llmer/8f2bad2c-5c31-433a-bbf0-f1a8f0a80c3a.json b/leaderboard_data/HFOpenLLMv2/sumink/sumink_llmer/8f2bad2c-5c31-433a-bbf0-f1a8f0a80c3a.json deleted file mode 100644 index 09ee4fe8c6585fe5fa35d7d181855635cd26808e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sumink/sumink_llmer/8f2bad2c-5c31-433a-bbf0-f1a8f0a80c3a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sumink_llmer/1762652580.548394", - "retrieved_timestamp": "1762652580.548395", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sumink/llmer", - "developer": "sumink", - "inference_platform": "unknown", - "id": "sumink/llmer" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3191132860809319 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4884590875207178 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0649546827794562 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4039166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35289228723404253 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sumink/sumink_qwft/6cdf831f-3ccd-4d78-a94f-269ace42fc1c.json b/leaderboard_data/HFOpenLLMv2/sumink/sumink_qwft/6cdf831f-3ccd-4d78-a94f-269ace42fc1c.json deleted file mode 100644 index 8c73496fb22883f3080dbe5438f73565653995f6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sumink/sumink_qwft/6cdf831f-3ccd-4d78-a94f-269ace42fc1c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sumink_qwft/1762652580.548597", - "retrieved_timestamp": "1762652580.548597", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sumink/qwft", - "developer": "sumink", - "inference_platform": "unknown", - "id": "sumink/qwft" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11965252197502627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30021752093452153 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3580625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11294880319148937 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sumink/sumink_qwmer/2cd4d3ec-2800-4223-ab50-6f9f4a1e1a57.json b/leaderboard_data/HFOpenLLMv2/sumink/sumink_qwmer/2cd4d3ec-2800-4223-ab50-6f9f4a1e1a57.json deleted file mode 100644 index 3023c1b95df0794aed1862396b273179b2279dec..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sumink/sumink_qwmer/2cd4d3ec-2800-4223-ab50-6f9f4a1e1a57.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sumink_qwmer/1762652580.54879", - "retrieved_timestamp": "1762652580.548791", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sumink/qwmer", - "developer": "sumink", - "inference_platform": "unknown", - "id": "sumink/qwmer" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22124407682726277 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4298800979582788 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0007552870090634441 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28691275167785235 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4031770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22149268617021275 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sumink/sumink_solarmer3/59ebeb48-88c4-4c63-92bb-888752ea9dad.json b/leaderboard_data/HFOpenLLMv2/sumink/sumink_solarmer3/59ebeb48-88c4-4c63-92bb-888752ea9dad.json deleted file mode 100644 index 16184f323a711a5d29c8902c5da70adc443e6666..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sumink/sumink_solarmer3/59ebeb48-88c4-4c63-92bb-888752ea9dad.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sumink_solarmer3/1762652580.5489879", - "retrieved_timestamp": "1762652580.5489888", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sumink/solarmer3", - "developer": "sumink", - "inference_platform": "unknown", - "id": "sumink/solarmer3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3741428299135183 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5265990319952963 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0581570996978852 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2911073825503356 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44013541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.332280585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sumink/sumink_somer/282fa475-0ac8-4230-8020-9dbb7fda03da.json b/leaderboard_data/HFOpenLLMv2/sumink/sumink_somer/282fa475-0ac8-4230-8020-9dbb7fda03da.json deleted file mode 100644 index d0e1b6314a125f7829c0675c134bf62f341a2e2a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sumink/sumink_somer/282fa475-0ac8-4230-8020-9dbb7fda03da.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sumink_somer/1762652580.549191", - "retrieved_timestamp": "1762652580.549192", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sumink/somer", - "developer": "sumink", - "inference_platform": "unknown", - "id": "sumink/somer" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29902990731259727 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.519370328606347 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04154078549848943 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.465 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447473404255319 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sumink/sumink_somer2/fee6fbc3-c115-4668-8b5b-35b307c15fe8.json b/leaderboard_data/HFOpenLLMv2/sumink/sumink_somer2/fee6fbc3-c115-4668-8b5b-35b307c15fe8.json deleted file mode 100644 index 7b653392669d03cf903e76453c519a46ee589eda..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sumink/sumink_somer2/fee6fbc3-c115-4668-8b5b-35b307c15fe8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sumink_somer2/1762652580.549396", - "retrieved_timestamp": "1762652580.549397", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sumink/somer2", - "developer": "sumink", - "inference_platform": "unknown", - "id": "sumink/somer2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3132433055404106 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5166793474130525 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04682779456193353 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46630208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34325132978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sumink/sumink_somerft/cb6879a2-41b6-40b6-bb20-723aa0b213e1.json b/leaderboard_data/HFOpenLLMv2/sumink/sumink_somerft/cb6879a2-41b6-40b6-bb20-723aa0b213e1.json deleted file mode 100644 index 2fe017892da7eb599537ac3e176d6cc472ef247c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sumink/sumink_somerft/cb6879a2-41b6-40b6-bb20-723aa0b213e1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sumink_somerft/1762652580.5496058", - "retrieved_timestamp": "1762652580.5496068", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sumink/somerft", - "developer": "sumink", - "inference_platform": "unknown", - "id": "sumink/somerft" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14305819669587805 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3093455213252133 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2483221476510067 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40447916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11170212765957446 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.543 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/sunbaby/sunbaby_BrainCog-8B-0.1-Instruct/96412e92-8a74-429b-8014-30a526521356.json b/leaderboard_data/HFOpenLLMv2/sunbaby/sunbaby_BrainCog-8B-0.1-Instruct/96412e92-8a74-429b-8014-30a526521356.json deleted file mode 100644 index d28e6637f64b3a452bea03d03332c37362efb7bd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/sunbaby/sunbaby_BrainCog-8B-0.1-Instruct/96412e92-8a74-429b-8014-30a526521356.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/sunbaby_BrainCog-8B-0.1-Instruct/1762652580.549814", - "retrieved_timestamp": "1762652580.549815", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "sunbaby/BrainCog-8B-0.1-Instruct", - "developer": "sunbaby", - "inference_platform": "unknown", - "id": "sunbaby/BrainCog-8B-0.1-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4253004250943053 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46182179983247446 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09667673716012085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011744966442953 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36559375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28582114361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/swap-uniba/swap-uniba_LLaMAntino-3-ANITA-8B-Inst-DPO-ITA/f2475574-fc9d-4cd1-94fb-ddd8bb89fa95.json b/leaderboard_data/HFOpenLLMv2/swap-uniba/swap-uniba_LLaMAntino-3-ANITA-8B-Inst-DPO-ITA/f2475574-fc9d-4cd1-94fb-ddd8bb89fa95.json deleted file mode 100644 index aa37fff4de71b4292252696b2523dd3b535dde1f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/swap-uniba/swap-uniba_LLaMAntino-3-ANITA-8B-Inst-DPO-ITA/f2475574-fc9d-4cd1-94fb-ddd8bb89fa95.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/swap-uniba_LLaMAntino-3-ANITA-8B-Inst-DPO-ITA/1762652580.550269", - "retrieved_timestamp": "1762652580.5502698", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA", - "developer": "swap-uniba", - "inference_platform": "unknown", - "id": "swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4815046299374548 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4935698792285044 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43873958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3723404255319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/talha2001/talha2001_Beast-Soul-new/01f536ff-7613-4b09-b793-1f51bf32f705.json b/leaderboard_data/HFOpenLLMv2/talha2001/talha2001_Beast-Soul-new/01f536ff-7613-4b09-b793-1f51bf32f705.json deleted file mode 100644 index 68b7b16996810cf431781f2a30f34809634c6e3f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/talha2001/talha2001_Beast-Soul-new/01f536ff-7613-4b09-b793-1f51bf32f705.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/talha2001_Beast-Soul-new/1762652580.5509062", - "retrieved_timestamp": "1762652580.5509079", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "talha2001/Beast-Soul-new", - "developer": "talha2001", - "inference_platform": "unknown", - "id": "talha2001/Beast-Soul-new" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4853510906616666 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5227143628884523 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4459270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3101728723404255 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tangledgroup/tangledgroup_tangled-llama-pints-1.5b-v0.1-instruct/727047f6-974d-4980-a8cd-672728885485.json b/leaderboard_data/HFOpenLLMv2/tangledgroup/tangledgroup_tangled-llama-pints-1.5b-v0.1-instruct/727047f6-974d-4980-a8cd-672728885485.json deleted file mode 100644 index 16378c159b92a8a3fee54d4f23c15e3c49c19f63..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tangledgroup/tangledgroup_tangled-llama-pints-1.5b-v0.1-instruct/727047f6-974d-4980-a8cd-672728885485.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tangledgroup_tangled-llama-pints-1.5b-v0.1-instruct/1762652580.5513222", - "retrieved_timestamp": "1762652580.5513222", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tangledgroup/tangled-llama-pints-1.5b-v0.1-instruct", - "developer": "tangledgroup", - "inference_platform": "unknown", - "id": "tangledgroup/tangled-llama-pints-1.5b-v0.1-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15090182936829835 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31434444692284963 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23993288590604026 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37613541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11087101063829788 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.5 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tangledgroup/tangledgroup_tangled-llama-pints-1.5b-v0.2-instruct/3964e579-bb1f-46be-8740-ba8097d8f7ef.json b/leaderboard_data/HFOpenLLMv2/tangledgroup/tangledgroup_tangled-llama-pints-1.5b-v0.2-instruct/3964e579-bb1f-46be-8740-ba8097d8f7ef.json deleted file mode 100644 index bf327616537466d9aaaf0165b2e1c21eff63f1a8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tangledgroup/tangledgroup_tangled-llama-pints-1.5b-v0.2-instruct/3964e579-bb1f-46be-8740-ba8097d8f7ef.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tangledgroup_tangled-llama-pints-1.5b-v0.2-instruct/1762652580.551594", - "retrieved_timestamp": "1762652580.551595", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tangledgroup/tangled-llama-pints-1.5b-v0.2-instruct", - "developer": "tangledgroup", - "inference_platform": "unknown", - "id": "tangledgroup/tangled-llama-pints-1.5b-v0.2-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1724092075692496 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3158349391752727 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24161073825503357 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3642916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11170212765957446 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.5 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tanliboy/tanliboy_lambda-gemma-2-9b-dpo/6dd14f37-6493-4f9d-a5a8-6ad62aa4ca04.json b/leaderboard_data/HFOpenLLMv2/tanliboy/tanliboy_lambda-gemma-2-9b-dpo/6dd14f37-6493-4f9d-a5a8-6ad62aa4ca04.json deleted file mode 100644 index fdc29a139ffca5584b456391e16c194813a5f1c4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tanliboy/tanliboy_lambda-gemma-2-9b-dpo/6dd14f37-6493-4f9d-a5a8-6ad62aa4ca04.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tanliboy_lambda-gemma-2-9b-dpo/1762652580.551808", - "retrieved_timestamp": "1762652580.551809", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tanliboy/lambda-gemma-2-9b-dpo", - "developer": "tanliboy", - "inference_platform": "unknown", - "id": "tanliboy/lambda-gemma-2-9b-dpo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45008023156336296 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.547172399190412 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09441087613293052 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40165625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.379155585106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tanliboy/tanliboy_lambda-gemma-2-9b-dpo/fe623f86-5397-4818-aa3f-75c2f6632bec.json b/leaderboard_data/HFOpenLLMv2/tanliboy/tanliboy_lambda-gemma-2-9b-dpo/fe623f86-5397-4818-aa3f-75c2f6632bec.json deleted file mode 100644 index ec92936716783e7f03a10b70c38b3b348fdfbbc6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tanliboy/tanliboy_lambda-gemma-2-9b-dpo/fe623f86-5397-4818-aa3f-75c2f6632bec.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tanliboy_lambda-gemma-2-9b-dpo/1762652580.5520582", - "retrieved_timestamp": "1762652580.5520582", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tanliboy/lambda-gemma-2-9b-dpo", - "developer": "tanliboy", - "inference_platform": "unknown", - "id": "tanliboy/lambda-gemma-2-9b-dpo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18292463995531855 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5487911206515993 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40562499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3804853723404255 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 9.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tanliboy/tanliboy_lambda-qwen2.5-14b-dpo-test/04686df9-9ef7-4df9-bb1e-a4c113a6e32e.json b/leaderboard_data/HFOpenLLMv2/tanliboy/tanliboy_lambda-qwen2.5-14b-dpo-test/04686df9-9ef7-4df9-bb1e-a4c113a6e32e.json deleted file mode 100644 index 7c9390e7e71f2312ab17e7d4521736c9417e5b9d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tanliboy/tanliboy_lambda-qwen2.5-14b-dpo-test/04686df9-9ef7-4df9-bb1e-a4c113a6e32e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tanliboy_lambda-qwen2.5-14b-dpo-test/1762652580.5523891", - "retrieved_timestamp": "1762652580.5523908", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tanliboy/lambda-qwen2.5-14b-dpo-test", - "developer": "tanliboy", - "inference_platform": "unknown", - "id": "tanliboy/lambda-qwen2.5-14b-dpo-test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8231215397367873 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6393505282981286 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5460725075528701 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624161073825503 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42603125000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4847905585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tanliboy/tanliboy_lambda-qwen2.5-32b-dpo-test/87569202-e422-423b-a2a6-96f94dbaf99c.json b/leaderboard_data/HFOpenLLMv2/tanliboy/tanliboy_lambda-qwen2.5-32b-dpo-test/87569202-e422-423b-a2a6-96f94dbaf99c.json deleted file mode 100644 index 06e55aede6ba4980a1a03aa0458bf7a5346f0a42..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tanliboy/tanliboy_lambda-qwen2.5-32b-dpo-test/87569202-e422-423b-a2a6-96f94dbaf99c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tanliboy_lambda-qwen2.5-32b-dpo-test/1762652580.552684", - "retrieved_timestamp": "1762652580.552685", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tanliboy/lambda-qwen2.5-32b-dpo-test", - "developer": "tanliboy", - "inference_platform": "unknown", - "id": "tanliboy/lambda-qwen2.5-32b-dpo-test" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8083839767372794 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6763904009446838 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6102719033232629 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3565436241610738 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42742708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.565658244680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tannedbum/tannedbum_Ellaria-9B/ca946b2a-4345-42b9-aefd-0907b91759d7.json b/leaderboard_data/HFOpenLLMv2/tannedbum/tannedbum_Ellaria-9B/ca946b2a-4345-42b9-aefd-0907b91759d7.json deleted file mode 100644 index a89d998e4443f7021d9b7e30a3ed109fff44da0e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tannedbum/tannedbum_Ellaria-9B/ca946b2a-4345-42b9-aefd-0907b91759d7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tannedbum_Ellaria-9B/1762652580.5529752", - "retrieved_timestamp": "1762652580.552976", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tannedbum/Ellaria-9B", - "developer": "tannedbum", - "inference_platform": "unknown", - "id": "tannedbum/Ellaria-9B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7825802204816554 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5942102115140485 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20770392749244712 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33305369127516776 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4151458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42054521276595747 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 10.159 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tannedbum/tannedbum_L3-Nymeria-Maid-8B/3b1941a4-b8ca-49f4-9c09-18beb1b470e4.json b/leaderboard_data/HFOpenLLMv2/tannedbum/tannedbum_L3-Nymeria-Maid-8B/3b1941a4-b8ca-49f4-9c09-18beb1b470e4.json deleted file mode 100644 index 99d6d94701feda5db57f523d3afcd544b0b6c646..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tannedbum/tannedbum_L3-Nymeria-Maid-8B/3b1941a4-b8ca-49f4-9c09-18beb1b470e4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tannedbum_L3-Nymeria-Maid-8B/1762652580.553287", - "retrieved_timestamp": "1762652580.553288", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tannedbum/L3-Nymeria-Maid-8B", - "developer": "tannedbum", - "inference_platform": "unknown", - "id": "tannedbum/L3-Nymeria-Maid-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7250029920610646 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5146055785516804 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37505208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37466755319148937 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tannedbum/tannedbum_L3-Nymeria-v2-8B/61d5c969-6aff-49b7-8fa3-bcf0ff0b661d.json b/leaderboard_data/HFOpenLLMv2/tannedbum/tannedbum_L3-Nymeria-v2-8B/61d5c969-6aff-49b7-8fa3-bcf0ff0b661d.json deleted file mode 100644 index 1fe7444e9bd56d11fd63edef5ff9e796b6689c47..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tannedbum/tannedbum_L3-Nymeria-v2-8B/61d5c969-6aff-49b7-8fa3-bcf0ff0b661d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tannedbum_L3-Nymeria-v2-8B/1762652580.553518", - "retrieved_timestamp": "1762652580.553519", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tannedbum/L3-Nymeria-v2-8B", - "developer": "tannedbum", - "inference_platform": "unknown", - "id": "tannedbum/L3-Nymeria-v2-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7168346653545925 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5224198261531375 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09214501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902684563758389 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.369875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37533244680851063 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tannedbum/tannedbum_L3-Rhaenys-8B/c44ac25e-9139-477d-abcd-442b3a0dc2cf.json b/leaderboard_data/HFOpenLLMv2/tannedbum/tannedbum_L3-Rhaenys-8B/c44ac25e-9139-477d-abcd-442b3a0dc2cf.json deleted file mode 100644 index 53825484b2a689e01a0ac5124ccc607d0bdde670..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tannedbum/tannedbum_L3-Rhaenys-8B/c44ac25e-9139-477d-abcd-442b3a0dc2cf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tannedbum_L3-Rhaenys-8B/1762652580.553731", - "retrieved_timestamp": "1762652580.5537322", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tannedbum/L3-Rhaenys-8B", - "developer": "tannedbum", - "inference_platform": "unknown", - "id": "tannedbum/L3-Rhaenys-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7362686560548235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5299209893116719 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08761329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2978187919463087 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3724791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3799035904255319 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/teknium/teknium_OpenHermes-13B/55d876b7-159e-4c76-848b-1480b4c2f4a2.json b/leaderboard_data/HFOpenLLMv2/teknium/teknium_OpenHermes-13B/55d876b7-159e-4c76-848b-1480b4c2f4a2.json deleted file mode 100644 index 91252ed592ea0f9bdc63c3e08db7b68998f2ce8b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/teknium/teknium_OpenHermes-13B/55d876b7-159e-4c76-848b-1480b4c2f4a2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/teknium_OpenHermes-13B/1762652580.5542011", - "retrieved_timestamp": "1762652580.554202", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "teknium/OpenHermes-13B", - "developer": "teknium", - "inference_platform": "unknown", - "id": "teknium/OpenHermes-13B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2668065178171696 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42064384521911524 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4042604166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23894614361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/teknium/teknium_OpenHermes-7B/089f10dc-8be6-4595-a0b3-7d5bb4fc13fa.json b/leaderboard_data/HFOpenLLMv2/teknium/teknium_OpenHermes-7B/089f10dc-8be6-4595-a0b3-7d5bb4fc13fa.json deleted file mode 100644 index 2793743964318bf5f0b0c41877d43755406ab1b1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/teknium/teknium_OpenHermes-7B/089f10dc-8be6-4595-a0b3-7d5bb4fc13fa.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/teknium_OpenHermes-7B/1762652580.5548952", - "retrieved_timestamp": "1762652580.5548952", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "teknium/OpenHermes-7B", - "developer": "teknium", - "inference_platform": "unknown", - "id": "teknium/OpenHermes-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1812513021006485 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.362033648602934 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4323854166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19331781914893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tensopolis/tensopolis_falcon3-10b-tensopolis-v1/d59c7d7c-99a9-4de5-9a69-60b934eafa1b.json b/leaderboard_data/HFOpenLLMv2/tensopolis/tensopolis_falcon3-10b-tensopolis-v1/d59c7d7c-99a9-4de5-9a69-60b934eafa1b.json deleted file mode 100644 index d44135c58acb33c71723e10967f40c149acc27b9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tensopolis/tensopolis_falcon3-10b-tensopolis-v1/d59c7d7c-99a9-4de5-9a69-60b934eafa1b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tensopolis_falcon3-10b-tensopolis-v1/1762652580.555104", - "retrieved_timestamp": "1762652580.555105", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tensopolis/falcon3-10b-tensopolis-v1", - "developer": "tensopolis", - "inference_platform": "unknown", - "id": "tensopolis/falcon3-10b-tensopolis-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7816560060639104 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.618226655000786 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27492447129909364 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3296979865771812 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43753125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4419880319148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tensopolis/tensopolis_falcon3-10b-tensopolis-v2/ce5dfe15-432b-42ac-9ef1-569ab4e640a6.json b/leaderboard_data/HFOpenLLMv2/tensopolis/tensopolis_falcon3-10b-tensopolis-v2/ce5dfe15-432b-42ac-9ef1-569ab4e640a6.json deleted file mode 100644 index 2f009851b8fa5d3a2c679c2f56d1ed6c890d6d96..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tensopolis/tensopolis_falcon3-10b-tensopolis-v2/ce5dfe15-432b-42ac-9ef1-569ab4e640a6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tensopolis_falcon3-10b-tensopolis-v2/1762652580.555352", - "retrieved_timestamp": "1762652580.5553532", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tensopolis/falcon3-10b-tensopolis-v2", - "developer": "tensopolis", - "inference_platform": "unknown", - "id": "tensopolis/falcon3-10b-tensopolis-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7792080568447275 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.618226655000786 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26661631419939574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3271812080536913 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4296875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4424035904255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tensopolis/tensopolis_lamarckvergence-14b-tensopolis-v1/da94039c-b214-4ad0-a312-a38cea28498b.json b/leaderboard_data/HFOpenLLMv2/tensopolis/tensopolis_lamarckvergence-14b-tensopolis-v1/da94039c-b214-4ad0-a312-a38cea28498b.json deleted file mode 100644 index b215de49c4e127d184f4f8727e1cd323c3a4a49d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tensopolis/tensopolis_lamarckvergence-14b-tensopolis-v1/da94039c-b214-4ad0-a312-a38cea28498b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tensopolis_lamarckvergence-14b-tensopolis-v1/1762652580.555553", - "retrieved_timestamp": "1762652580.5555542", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tensopolis/lamarckvergence-14b-tensopolis-v1", - "developer": "tensopolis", - "inference_platform": "unknown", - "id": "tensopolis/lamarckvergence-14b-tensopolis-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7603735865281896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6561154329558933 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5166163141993958 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36073825503355705 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44745833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5250166223404256 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tensopolis/tensopolis_virtuoso-lite-tensopolis-v1/574e1e63-46f3-44a4-8d04-ad1709a7e1dd.json b/leaderboard_data/HFOpenLLMv2/tensopolis/tensopolis_virtuoso-lite-tensopolis-v1/574e1e63-46f3-44a4-8d04-ad1709a7e1dd.json deleted file mode 100644 index 8d4d4e874b1529c676b7b3533d28cd0c38fa1c19..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tensopolis/tensopolis_virtuoso-lite-tensopolis-v1/574e1e63-46f3-44a4-8d04-ad1709a7e1dd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tensopolis_virtuoso-lite-tensopolis-v1/1762652580.557624", - "retrieved_timestamp": "1762652580.557625", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tensopolis/virtuoso-lite-tensopolis-v1", - "developer": "tensopolis", - "inference_platform": "unknown", - "id": "tensopolis/virtuoso-lite-tensopolis-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.806910109620252 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.610185430846048 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2545317220543807 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447986577181208 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4582395833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4434840425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tensopolis/tensopolis_virtuoso-lite-tensopolis-v2/9024dcc9-fbd0-4ab0-9142-cbf741e7ae54.json b/leaderboard_data/HFOpenLLMv2/tensopolis/tensopolis_virtuoso-lite-tensopolis-v2/9024dcc9-fbd0-4ab0-9142-cbf741e7ae54.json deleted file mode 100644 index 7e0e25500cf74e931df84123049a7ecc63cef389..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tensopolis/tensopolis_virtuoso-lite-tensopolis-v2/9024dcc9-fbd0-4ab0-9142-cbf741e7ae54.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tensopolis_virtuoso-lite-tensopolis-v2/1762652580.5578399", - "retrieved_timestamp": "1762652580.5578408", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tensopolis/virtuoso-lite-tensopolis-v2", - "developer": "tensopolis", - "inference_platform": "unknown", - "id": "tensopolis/virtuoso-lite-tensopolis-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8029384255996312 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6100187641793813 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4595416666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44398271276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tensopolis/tensopolis_virtuoso-small-tensopolis-v1/2228ade6-6243-423f-857e-66f5584a1511.json b/leaderboard_data/HFOpenLLMv2/tensopolis/tensopolis_virtuoso-small-tensopolis-v1/2228ade6-6243-423f-857e-66f5584a1511.json deleted file mode 100644 index 27e63593cef7c4b45969895d7971bce3659fca21..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tensopolis/tensopolis_virtuoso-small-tensopolis-v1/2228ade6-6243-423f-857e-66f5584a1511.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tensopolis_virtuoso-small-tensopolis-v1/1762652580.5582058", - "retrieved_timestamp": "1762652580.558207", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tensopolis/virtuoso-small-tensopolis-v1", - "developer": "tensopolis", - "inference_platform": "unknown", - "id": "tensopolis/virtuoso-small-tensopolis-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7856276900845313 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6415395136436205 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3527190332326284 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32802013422818793 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43263541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4968417553191489 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tensopolis/tensopolis_virtuoso-small-tensopolis-v2/c5c34d42-c043-4d60-80bf-5cb522e9d915.json b/leaderboard_data/HFOpenLLMv2/tensopolis/tensopolis_virtuoso-small-tensopolis-v2/c5c34d42-c043-4d60-80bf-5cb522e9d915.json deleted file mode 100644 index 0e73ac35f2dda4cef4783eff721d2f79e84e2d90..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tensopolis/tensopolis_virtuoso-small-tensopolis-v2/c5c34d42-c043-4d60-80bf-5cb522e9d915.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tensopolis_virtuoso-small-tensopolis-v2/1762652580.5584881", - "retrieved_timestamp": "1762652580.558489", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tensopolis/virtuoso-small-tensopolis-v2", - "developer": "tensopolis", - "inference_platform": "unknown", - "id": "tensopolis/virtuoso-small-tensopolis-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8020142111818863 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6515835977499008 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38746223564954685 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43523958333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.515375664893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tensopolis/tensopolis_virtuoso-small-v2-tensopolis-v1/727869c4-3498-482a-a04e-c6a779c0e558.json b/leaderboard_data/HFOpenLLMv2/tensopolis/tensopolis_virtuoso-small-v2-tensopolis-v1/727869c4-3498-482a-a04e-c6a779c0e558.json deleted file mode 100644 index 04eb8e0c89303c4ad6929dd91e196e05b4bcab71..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tensopolis/tensopolis_virtuoso-small-v2-tensopolis-v1/727869c4-3498-482a-a04e-c6a779c0e558.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tensopolis_virtuoso-small-v2-tensopolis-v1/1762652580.558718", - "retrieved_timestamp": "1762652580.558719", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tensopolis/virtuoso-small-v2-tensopolis-v1", - "developer": "tensopolis", - "inference_platform": "unknown", - "id": "tensopolis/virtuoso-small-v2-tensopolis-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8419061423689145 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6544753426578069 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.452416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45092708333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5175365691489362 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tensoropera/tensoropera_Fox-1-1.6B/998d2bbc-2722-4fb8-9a6a-230c146e2e37.json b/leaderboard_data/HFOpenLLMv2/tensoropera/tensoropera_Fox-1-1.6B/998d2bbc-2722-4fb8-9a6a-230c146e2e37.json deleted file mode 100644 index 833fd69ab52bb06fc1c6fdabaa009ec802683d02..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tensoropera/tensoropera_Fox-1-1.6B/998d2bbc-2722-4fb8-9a6a-230c146e2e37.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tensoropera_Fox-1-1.6B/1762652580.558935", - "retrieved_timestamp": "1762652580.558936", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tensoropera/Fox-1-1.6B", - "developer": "tensoropera", - "inference_platform": "unknown", - "id": "tensoropera/Fox-1-1.6B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27659831469390106 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3307369914593792 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35498958333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1371343085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.665 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/theo77186/theo77186_Qwen2.5-Coder-7B-Instruct-20241106/b8198c8b-533a-4f7c-9025-1ccd7a4aba76.json b/leaderboard_data/HFOpenLLMv2/theo77186/theo77186_Qwen2.5-Coder-7B-Instruct-20241106/b8198c8b-533a-4f7c-9025-1ccd7a4aba76.json deleted file mode 100644 index 3d0d365194704d35643517701fe036b528fab504..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/theo77186/theo77186_Qwen2.5-Coder-7B-Instruct-20241106/b8198c8b-533a-4f7c-9025-1ccd7a4aba76.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/theo77186_Qwen2.5-Coder-7B-Instruct-20241106/1762652580.559671", - "retrieved_timestamp": "1762652580.559671", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "theo77186/Qwen2.5-Coder-7B-Instruct-20241106", - "developer": "theo77186", - "inference_platform": "unknown", - "id": "theo77186/Qwen2.5-Coder-7B-Instruct-20241106" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6101477413263474 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5007976986224548 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38821752265861026 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29194630872483224 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4072708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33527260638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/theprint/theprint_Boptruth-Agatha-7B/0d1c0e64-8a5a-4797-9234-91a4f1726171.json b/leaderboard_data/HFOpenLLMv2/theprint/theprint_Boptruth-Agatha-7B/0d1c0e64-8a5a-4797-9234-91a4f1726171.json deleted file mode 100644 index 9811acb30f82aa9940a9da9fdfda1203feaedb54..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/theprint/theprint_Boptruth-Agatha-7B/0d1c0e64-8a5a-4797-9234-91a4f1726171.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/theprint_Boptruth-Agatha-7B/1762652580.559956", - "retrieved_timestamp": "1762652580.559957", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "theprint/Boptruth-Agatha-7B", - "developer": "theprint", - "inference_platform": "unknown", - "id": "theprint/Boptruth-Agatha-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.312418826491487 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4983936045348778 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42766666666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28607047872340424 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/theprint/theprint_CleverBoi-7B-v2/0ef8de5e-4e2f-4d74-9267-e953375dbdf4.json b/leaderboard_data/HFOpenLLMv2/theprint/theprint_CleverBoi-7B-v2/0ef8de5e-4e2f-4d74-9267-e953375dbdf4.json deleted file mode 100644 index 9d5f07784a8c8d084be3d18724feefb8b58039e4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/theprint/theprint_CleverBoi-7B-v2/0ef8de5e-4e2f-4d74-9267-e953375dbdf4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/theprint_CleverBoi-7B-v2/1762652580.56022", - "retrieved_timestamp": "1762652580.560221", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "theprint/CleverBoi-7B-v2", - "developer": "theprint", - "inference_platform": "unknown", - "id": "theprint/CleverBoi-7B-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21699756645700075 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45317253321634526 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46953125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27086103723404253 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 7.736 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/theprint/theprint_CleverBoi-7B-v3/4634b7d7-110e-422c-af60-80cd9df06dac.json b/leaderboard_data/HFOpenLLMv2/theprint/theprint_CleverBoi-7B-v3/4634b7d7-110e-422c-af60-80cd9df06dac.json deleted file mode 100644 index e3e4f6359412002544c3fc3024c11339f85f1958..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/theprint/theprint_CleverBoi-7B-v3/4634b7d7-110e-422c-af60-80cd9df06dac.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/theprint_CleverBoi-7B-v3/1762652580.560437", - "retrieved_timestamp": "1762652580.560438", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "theprint/CleverBoi-7B-v3", - "developer": "theprint", - "inference_platform": "unknown", - "id": "theprint/CleverBoi-7B-v3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23823011830831084 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4414430902840938 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26593959731543626 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4071770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28681848404255317 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 7.736 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/theprint/theprint_CleverBoi-Llama-3.1-8B-Instruct/86d3bb20-09a5-4ec0-a473-14a3e3c5a402.json b/leaderboard_data/HFOpenLLMv2/theprint/theprint_CleverBoi-Llama-3.1-8B-Instruct/86d3bb20-09a5-4ec0-a473-14a3e3c5a402.json deleted file mode 100644 index 371434d407bf40b14a385d5ceb2fb5fa088b72ef..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/theprint/theprint_CleverBoi-Llama-3.1-8B-Instruct/86d3bb20-09a5-4ec0-a473-14a3e3c5a402.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/theprint_CleverBoi-Llama-3.1-8B-Instruct/1762652580.5606558", - "retrieved_timestamp": "1762652580.5606568", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "theprint/CleverBoi-Llama-3.1-8B-Instruct", - "developer": "theprint", - "inference_platform": "unknown", - "id": "theprint/CleverBoi-Llama-3.1-8B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16816269719898758 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4559618469185147 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40143750000000006 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30751329787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 16.061 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/theprint/theprint_CleverBoi-Nemo-12B-v2/3ac95acf-830a-48ca-a144-42b610558062.json b/leaderboard_data/HFOpenLLMv2/theprint/theprint_CleverBoi-Nemo-12B-v2/3ac95acf-830a-48ca-a144-42b610558062.json deleted file mode 100644 index f81479b7f3a7caf3697cc858ee8777ecd985267e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/theprint/theprint_CleverBoi-Nemo-12B-v2/3ac95acf-830a-48ca-a144-42b610558062.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/theprint_CleverBoi-Nemo-12B-v2/1762652580.561142", - "retrieved_timestamp": "1762652580.561143", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "theprint/CleverBoi-Nemo-12B-v2", - "developer": "theprint", - "inference_platform": "unknown", - "id": "theprint/CleverBoi-Nemo-12B-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2045827293802666 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5241085887165254 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4186770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3228058510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 13.933 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/theprint/theprint_ReWiz-7B/b6f50cef-72b3-414c-a33a-a2c8b2af18c0.json b/leaderboard_data/HFOpenLLMv2/theprint/theprint_ReWiz-7B/b6f50cef-72b3-414c-a33a-a2c8b2af18c0.json deleted file mode 100644 index f37bcfb8a70a1b34bca615ce60d46a652210f6cb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/theprint/theprint_ReWiz-7B/b6f50cef-72b3-414c-a33a-a2c8b2af18c0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/theprint_ReWiz-7B/1762652580.562494", - "retrieved_timestamp": "1762652580.562496", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "theprint/ReWiz-7B", - "developer": "theprint", - "inference_platform": "unknown", - "id": "theprint/ReWiz-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40479261692309737 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4564215411912313 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04078549848942598 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46115625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2670378989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 7.736 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/theprint/theprint_ReWiz-Nemo-12B-Instruct/92999dc0-7075-44ee-be68-1ec32ab5645d.json b/leaderboard_data/HFOpenLLMv2/theprint/theprint_ReWiz-Nemo-12B-Instruct/92999dc0-7075-44ee-be68-1ec32ab5645d.json deleted file mode 100644 index eb58686351609c0f4709a2974defc15cc75d821a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/theprint/theprint_ReWiz-Nemo-12B-Instruct/92999dc0-7075-44ee-be68-1ec32ab5645d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/theprint_ReWiz-Nemo-12B-Instruct/1762652580.563264", - "retrieved_timestamp": "1762652580.563264", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "theprint/ReWiz-Nemo-12B-Instruct", - "developer": "theprint", - "inference_platform": "unknown", - "id": "theprint/ReWiz-Nemo-12B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10623811486854878 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5092407647626753 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3238255033557047 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4095625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33394281914893614 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 12.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/theprint/theprint_ReWiz-Worldbuilder-7B/cf71c265-ef73-4410-a2bc-ce9702cfbcee.json b/leaderboard_data/HFOpenLLMv2/theprint/theprint_ReWiz-Worldbuilder-7B/cf71c265-ef73-4410-a2bc-ce9702cfbcee.json deleted file mode 100644 index 7b88710d66bf208f5f413c9b343a26dd3ae05219..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/theprint/theprint_ReWiz-Worldbuilder-7B/cf71c265-ef73-4410-a2bc-ce9702cfbcee.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/theprint_ReWiz-Worldbuilder-7B/1762652580.563769", - "retrieved_timestamp": "1762652580.56377", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "theprint/ReWiz-Worldbuilder-7B", - "developer": "theprint", - "inference_platform": "unknown", - "id": "theprint/ReWiz-Worldbuilder-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25101951710350756 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46361558385510165 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03700906344410876 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45725 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.297124335106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.248 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/theprint/theprint_RuDolph-Hermes-7B/22bab713-09d7-471a-b077-cb8c336ba151.json b/leaderboard_data/HFOpenLLMv2/theprint/theprint_RuDolph-Hermes-7B/22bab713-09d7-471a-b077-cb8c336ba151.json deleted file mode 100644 index f170f1f913c0adf91077401f32e0fa9a5ba75cd0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/theprint/theprint_RuDolph-Hermes-7B/22bab713-09d7-471a-b077-cb8c336ba151.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/theprint_RuDolph-Hermes-7B/1762652580.564037", - "retrieved_timestamp": "1762652580.5640378", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "theprint/RuDolph-Hermes-7B", - "developer": "theprint", - "inference_platform": "unknown", - "id": "theprint/RuDolph-Hermes-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3604292167005767 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5052928613425586 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0513595166163142 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31208053691275167 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4226145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30726396276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/theprint/theprint_WorldBuilder-12B/f1107803-5a3b-4fcc-b948-ff622b5f26da.json b/leaderboard_data/HFOpenLLMv2/theprint/theprint_WorldBuilder-12B/f1107803-5a3b-4fcc-b948-ff622b5f26da.json deleted file mode 100644 index 0272256f25cf8e76ec2a78a134d26e7c44dc0645..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/theprint/theprint_WorldBuilder-12B/f1107803-5a3b-4fcc-b948-ff622b5f26da.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/theprint_WorldBuilder-12B/1762652580.564255", - "retrieved_timestamp": "1762652580.564256", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "theprint/WorldBuilder-12B", - "developer": "theprint", - "inference_platform": "unknown", - "id": "theprint/WorldBuilder-12B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13743755457741016 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5010100641541125 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0445619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4066458333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31923204787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "?", - "params_billions": 13.933 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/thinkcoder/thinkcoder_llama3-8b-instruct-lora-8-sft/51caac64-fee1-4c7f-b474-1b1e0f71212c.json b/leaderboard_data/HFOpenLLMv2/thinkcoder/thinkcoder_llama3-8b-instruct-lora-8-sft/51caac64-fee1-4c7f-b474-1b1e0f71212c.json deleted file mode 100644 index 5a70acb41abf7f712b0f5573fb4ab76915c1c1a4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/thinkcoder/thinkcoder_llama3-8b-instruct-lora-8-sft/51caac64-fee1-4c7f-b474-1b1e0f71212c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/thinkcoder_llama3-8b-instruct-lora-8-sft/1762652580.564969", - "retrieved_timestamp": "1762652580.56497", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "thinkcoder/llama3-8b-instruct-lora-8-sft", - "developer": "thinkcoder", - "inference_platform": "unknown", - "id": "thinkcoder/llama3-8b-instruct-lora-8-sft" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6480416406246536 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4865011845587858 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10196374622356495 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32345833333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34757313829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/thirdeyeai/thirdeyeai_elevate360m/013a9bf9-7b9e-4084-b7a2-bb77ad0c18e1.json b/leaderboard_data/HFOpenLLMv2/thirdeyeai/thirdeyeai_elevate360m/013a9bf9-7b9e-4084-b7a2-bb77ad0c18e1.json deleted file mode 100644 index 02c234e98f5431e157a9820e97c12637675e2d54..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/thirdeyeai/thirdeyeai_elevate360m/013a9bf9-7b9e-4084-b7a2-bb77ad0c18e1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/thirdeyeai_elevate360m/1762652580.565248", - "retrieved_timestamp": "1762652580.565249", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "thirdeyeai/elevate360m", - "developer": "thirdeyeai", - "inference_platform": "unknown", - "id": "thirdeyeai/elevate360m" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04448862351892978 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2962583602962783 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2407718120805369 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34621875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1077127659574468 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.362 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-10B-Base/4e1ce0d3-f454-480b-a4f7-7aa827eaaf1a.json b/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-10B-Base/4e1ce0d3-f454-480b-a4f7-7aa827eaaf1a.json deleted file mode 100644 index dfc311a42f6b6cf6ac92e86a2745095d74c08026..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-10B-Base/4e1ce0d3-f454-480b-a4f7-7aa827eaaf1a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-10B-Base/1762652580.566659", - "retrieved_timestamp": "1762652580.566659", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tiiuae/Falcon3-10B-Base", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/Falcon3-10B-Base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3647754624396601 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.595004253437141 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24924471299093656 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34563758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43979166666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4240359042553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-10B-Instruct/741838df-e2a3-4c54-84d3-fe491444071b.json b/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-10B-Instruct/741838df-e2a3-4c54-84d3-fe491444071b.json deleted file mode 100644 index 047aa5bfc0f580d20771b632fd9f18266c6e0aa3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-10B-Instruct/741838df-e2a3-4c54-84d3-fe491444071b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-10B-Instruct/1762652580.566902", - "retrieved_timestamp": "1762652580.566903", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tiiuae/Falcon3-10B-Instruct", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/Falcon3-10B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7816560060639104 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6170469398052084 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2764350453172205 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3288590604026846 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43232291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44290226063829785 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.306 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-1B-Base/1e11a625-87e1-49d0-94a6-8f9ec1f75fc3.json b/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-1B-Base/1e11a625-87e1-49d0-94a6-8f9ec1f75fc3.json deleted file mode 100644 index f801d95bed7700427d60a208394b57aa38243696..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-1B-Base/1e11a625-87e1-49d0-94a6-8f9ec1f75fc3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-1B-Base/1762652580.567122", - "retrieved_timestamp": "1762652580.567122", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tiiuae/Falcon3-1B-Base", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/Falcon3-1B-Base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24280132271262472 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3571153918015637 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03323262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41473958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16082114361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.669 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-1B-Instruct/a060e2b0-d1ae-48b7-b8f9-c51fadc3e152.json b/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-1B-Instruct/a060e2b0-d1ae-48b7-b8f9-c51fadc3e152.json deleted file mode 100644 index 62f2233886463d7dc483cdc1ba47df2dcfccc027..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-1B-Instruct/a060e2b0-d1ae-48b7-b8f9-c51fadc3e152.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-1B-Instruct/1762652580.567335", - "retrieved_timestamp": "1762652580.567335", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tiiuae/Falcon3-1B-Instruct", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/Falcon3-1B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5556678501930433 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3744535691366672 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0634441087613293 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4188958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18384308510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.669 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-3B-Base/1b0d1ae7-322b-46d2-bc33-160f578499b1.json b/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-3B-Base/1b0d1ae7-322b-46d2-bc33-160f578499b1.json deleted file mode 100644 index 355f6c2b24f43130e512a9ed592901789f45783b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-3B-Base/1b0d1ae7-322b-46d2-bc33-160f578499b1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-3B-Base/1762652580.5675461", - "retrieved_timestamp": "1762652580.5675468", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tiiuae/Falcon3-3B-Base", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/Falcon3-3B-Base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2764985793250797 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4421367825874385 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11782477341389729 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29697986577181207 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3749895833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2878989361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.228 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-3B-Instruct/7aa3aa0e-3b5e-4c0c-a697-2e87859c44f2.json b/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-3B-Instruct/7aa3aa0e-3b5e-4c0c-a697-2e87859c44f2.json deleted file mode 100644 index 1dc16793d589273b0ea7e8d54a6cd404b31f9e1e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-3B-Instruct/7aa3aa0e-3b5e-4c0c-a697-2e87859c44f2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-3B-Instruct/1762652580.567748", - "retrieved_timestamp": "1762652580.567749", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tiiuae/Falcon3-3B-Instruct", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/Falcon3-3B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6976755010040027 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4754430332167569 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28859060402684567 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41359375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.300531914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.228 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-7B-Base/2420519c-81f1-43b3-9b76-af141d2574f4.json b/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-7B-Base/2420519c-81f1-43b3-9b76-af141d2574f4.json deleted file mode 100644 index fb8eeded3200b8e15197dc71901b19d6308f7f4a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-7B-Base/2420519c-81f1-43b3-9b76-af141d2574f4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-7B-Base/1762652580.56796", - "retrieved_timestamp": "1762652580.567961", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tiiuae/Falcon3-7B-Base", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/Falcon3-7B-Base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34159474638403875 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5098880466426711 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19410876132930513 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3464765100671141 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47020833333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3910405585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-7B-Instruct/ed988bd0-76b0-4ab6-9c9e-5a5e0aefb936.json b/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-7B-Instruct/ed988bd0-76b0-4ab6-9c9e-5a5e0aefb936.json deleted file mode 100644 index dc4044c6e534805d353152710869604748b52026..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-7B-Instruct/ed988bd0-76b0-4ab6-9c9e-5a5e0aefb936.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-7B-Instruct/1762652580.568164", - "retrieved_timestamp": "1762652580.568164", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tiiuae/Falcon3-7B-Instruct", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/Falcon3-7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7612479332615238 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.563244278519333 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4086102719033233 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48267708333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4087433510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.456 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-Mamba-7B-Base/766e6e63-5779-49cd-9e8c-2bc475c1356a.json b/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-Mamba-7B-Base/766e6e63-5779-49cd-9e8c-2bc475c1356a.json deleted file mode 100644 index 51eb376fe2b618a24fb46b80b09b599298012053..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-Mamba-7B-Base/766e6e63-5779-49cd-9e8c-2bc475c1356a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-Mamba-7B-Base/1762652580.568367", - "retrieved_timestamp": "1762652580.5683682", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tiiuae/Falcon3-Mamba-7B-Base", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/Falcon3-Mamba-7B-Base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28911288713945665 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4699280188827039 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19410876132930513 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3431458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30377327127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "FalconMambaForCausalLM", - "params_billions": 7.273 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-Mamba-7B-Instruct/69491efc-0287-4288-bdf0-bcc57c53b94e.json b/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-Mamba-7B-Instruct/69491efc-0287-4288-bdf0-bcc57c53b94e.json deleted file mode 100644 index 089c06dad7a62efcdd1f685945df38bbd83af9a0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_Falcon3-Mamba-7B-Instruct/69491efc-0287-4288-bdf0-bcc57c53b94e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tiiuae_Falcon3-Mamba-7B-Instruct/1762652580.5685718", - "retrieved_timestamp": "1762652580.5685718", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tiiuae/Falcon3-Mamba-7B-Instruct", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/Falcon3-Mamba-7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7165099713205406 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4678957688410694 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30060422960725075 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38686458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3369348404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "FalconMambaForCausalLM", - "params_billions": 7.273 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_falcon-11B/705a1ff4-2e40-4827-af54-099870fac588.json b/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_falcon-11B/705a1ff4-2e40-4827-af54-099870fac588.json deleted file mode 100644 index 63bee7ed2af07a1a082b9ab0b0d8e3bd8ea2add8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_falcon-11B/705a1ff4-2e40-4827-af54-099870fac588.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tiiuae_falcon-11B/1762652580.568774", - "retrieved_timestamp": "1762652580.568774", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tiiuae/falcon-11B", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/falcon-11B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3261324397044287 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43916370355493844 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.027945619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2709731543624161 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39864583333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23894614361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "FalconForCausalLM", - "params_billions": 11.103 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_falcon-40b-instruct/1d6f8802-e9aa-471c-8fbc-1cd807357ab5.json b/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_falcon-40b-instruct/1d6f8802-e9aa-471c-8fbc-1cd807357ab5.json deleted file mode 100644 index bd4eed595052082b4a94a853c117112464631fc6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_falcon-40b-instruct/1d6f8802-e9aa-471c-8fbc-1cd807357ab5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tiiuae_falcon-40b-instruct/1762652580.569173", - "retrieved_timestamp": "1762652580.569173", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tiiuae/falcon-40b-instruct", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/falcon-40b-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24544874266945038 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40538675151591974 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.019637462235649546 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37622916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2261469414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "FalconForCausalLM", - "params_billions": 40.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_falcon-40b/cfdece82-631e-48b7-8232-91a8d9ccf65c.json b/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_falcon-40b/cfdece82-631e-48b7-8232-91a8d9ccf65c.json deleted file mode 100644 index 27223a9df820d3ee23ca717663017069162baf07..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_falcon-40b/cfdece82-631e-48b7-8232-91a8d9ccf65c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tiiuae_falcon-40b/1762652580.568969", - "retrieved_timestamp": "1762652580.56897", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tiiuae/falcon-40b", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/falcon-40b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24964538535530173 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4018532495595801 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01812688821752266 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27348993288590606 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36314583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25049867021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "FalconForCausalLM", - "params_billions": 40.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_falcon-7b-instruct/2b84722f-58fc-421d-ae1a-9e21ac0b4080.json b/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_falcon-7b-instruct/2b84722f-58fc-421d-ae1a-9e21ac0b4080.json deleted file mode 100644 index d618f253ef86b43eae0703a831e81656ef8511d1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_falcon-7b-instruct/2b84722f-58fc-421d-ae1a-9e21ac0b4080.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tiiuae_falcon-7b-instruct/1762652580.5696268", - "retrieved_timestamp": "1762652580.5696268", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tiiuae/falcon-7b-instruct", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/falcon-7b-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19688869976107837 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32034221512355765 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.012084592145015106 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3633645833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1155252659574468 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "FalconForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_falcon-7b/0e9837cb-4dda-4058-a89e-4127b5980eed.json b/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_falcon-7b/0e9837cb-4dda-4058-a89e-4127b5980eed.json deleted file mode 100644 index e53ba593035a9c75d30e92c977a242bf3ba78a1a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_falcon-7b/0e9837cb-4dda-4058-a89e-4127b5980eed.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tiiuae_falcon-7b/1762652580.5693781", - "retrieved_timestamp": "1762652580.569379", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tiiuae/falcon-7b", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/falcon-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.182051401392749 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32852446117322215 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24496644295302014 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37784375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253324468085106 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "FalconForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_falcon-mamba-7b/9878c419-fff8-402a-a315-70864e5ae60c.json b/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_falcon-mamba-7b/9878c419-fff8-402a-a315-70864e5ae60c.json deleted file mode 100644 index 6ebba9a1c67a5cb2096afe2a74f84cfeb56dcd19..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tiiuae/tiiuae_falcon-mamba-7b/9878c419-fff8-402a-a315-70864e5ae60c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tiiuae_falcon-mamba-7b/1762652580.569833", - "retrieved_timestamp": "1762652580.569834", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tiiuae/falcon-mamba-7b", - "developer": "tiiuae", - "inference_platform": "unknown", - "id": "tiiuae/falcon-mamba-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3335760227307987 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4284854988604366 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0445619335347432 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3104026845637584 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42103124999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23021941489361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "FalconMambaForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_BiBo-v0.3/d0907791-99ed-4c01-8df4-80ab6ecc906f.json b/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_BiBo-v0.3/d0907791-99ed-4c01-8df4-80ab6ecc906f.json deleted file mode 100644 index f04633a4c0a96133e3d3631f79ae56e152459ca7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_BiBo-v0.3/d0907791-99ed-4c01-8df4-80ab6ecc906f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tinycompany_BiBo-v0.3/1762652580.570036", - "retrieved_timestamp": "1762652580.570036", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tinycompany/BiBo-v0.3", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/BiBo-v0.3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5183989592060179 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4641611514377814 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08761329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3949895833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29945146276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 2.943 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_BiBo-v0.7/8f186e60-a090-4b9e-9910-23054617fe57.json b/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_BiBo-v0.7/8f186e60-a090-4b9e-9910-23054617fe57.json deleted file mode 100644 index 384946d8a272bc337ff5ec2a7339f94b1a340af6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_BiBo-v0.7/8f186e60-a090-4b9e-9910-23054617fe57.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tinycompany_BiBo-v0.7/1762652580.570291", - "retrieved_timestamp": "1762652580.570291", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tinycompany/BiBo-v0.7", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/BiBo-v0.7" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3738181358794665 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43108167584271034 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0823262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40441666666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2650432180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 2.943 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_ShawtyIsBad-bgem3/ebf9067a-9836-4152-aa62-3ecbbc2459dc.json b/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_ShawtyIsBad-bgem3/ebf9067a-9836-4152-aa62-3ecbbc2459dc.json deleted file mode 100644 index eda1262c9f85a77046e2f952109c28a20d664b30..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_ShawtyIsBad-bgem3/ebf9067a-9836-4152-aa62-3ecbbc2459dc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tinycompany_ShawtyIsBad-bgem3/1762652580.570496", - "retrieved_timestamp": "1762652580.570497", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tinycompany/ShawtyIsBad-bgem3", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/ShawtyIsBad-bgem3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2608113139802391 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38529707856388956 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04833836858006042 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36946875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25831117021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.436 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_ShawtyIsBad-e5-large/e8fe4b10-f6f3-4036-a3d9-77b8d28822ae.json b/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_ShawtyIsBad-e5-large/e8fe4b10-f6f3-4036-a3d9-77b8d28822ae.json deleted file mode 100644 index d5583fd7b63bbc42f539f8b59d003420d8924a63..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_ShawtyIsBad-e5-large/e8fe4b10-f6f3-4036-a3d9-77b8d28822ae.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tinycompany_ShawtyIsBad-e5-large/1762652580.5709078", - "retrieved_timestamp": "1762652580.570912", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tinycompany/ShawtyIsBad-e5-large", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/ShawtyIsBad-e5-large" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24682287441765627 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873483842947396 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.045317220543806644 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37204166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25689827127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.436 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_ShawtyIsBad-ib/e2514850-3847-4fe7-abd8-240762ba507a.json b/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_ShawtyIsBad-ib/e2514850-3847-4fe7-abd8-240762ba507a.json deleted file mode 100644 index 301ea3925eedc5bcfaedbc1d53b20ee3b4c1d0bd..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_ShawtyIsBad-ib/e2514850-3847-4fe7-abd8-240762ba507a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tinycompany_ShawtyIsBad-ib/1762652580.571291", - "retrieved_timestamp": "1762652580.571292", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tinycompany/ShawtyIsBad-ib", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/ShawtyIsBad-ib" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2565149359255664 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3880457874839807 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3641041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.258061835106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.436 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_ShawtyIsBad-nomic-moe/7896d77a-e4c3-431b-9490-26d88664385b.json b/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_ShawtyIsBad-nomic-moe/7896d77a-e4c3-431b-9490-26d88664385b.json deleted file mode 100644 index 01b2ec0230517fd1ac52d73d21e9aff501d3d031..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_ShawtyIsBad-nomic-moe/7896d77a-e4c3-431b-9490-26d88664385b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tinycompany_ShawtyIsBad-nomic-moe/1762652580.571543", - "retrieved_timestamp": "1762652580.5715442", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tinycompany/ShawtyIsBad-nomic-moe", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/ShawtyIsBad-nomic-moe" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2607614462958284 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3878019225656597 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3070469798657718 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37470833333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2572307180851064 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.436 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_ShawtyIsBad-nomic1.5/cbda0920-b298-4db2-806d-65b7d6550b30.json b/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_ShawtyIsBad-nomic1.5/cbda0920-b298-4db2-806d-65b7d6550b30.json deleted file mode 100644 index 78de145c9a9152eddcb6b316e0657f47d23c413b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_ShawtyIsBad-nomic1.5/cbda0920-b298-4db2-806d-65b7d6550b30.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tinycompany_ShawtyIsBad-nomic1.5/1762652580.571785", - "retrieved_timestamp": "1762652580.571787", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tinycompany/ShawtyIsBad-nomic1.5", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/ShawtyIsBad-nomic1.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2543916807404354 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3873599493472512 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.311241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36283333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25673204787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.436 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_SigmaBoi-base/e523d43e-a198-4db5-9d91-c4959b136953.json b/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_SigmaBoi-base/e523d43e-a198-4db5-9d91-c4959b136953.json deleted file mode 100644 index 831b4ab9b6b9185998ef4ebf92c390a6692d2034..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_SigmaBoi-base/e523d43e-a198-4db5-9d91-c4959b136953.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-base/1762652580.5720189", - "retrieved_timestamp": "1762652580.57202", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tinycompany/SigmaBoi-base", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/SigmaBoi-base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24469961923252526 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4314363391906919 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07779456193353475 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43427083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2816655585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 2.943 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_SigmaBoi-bge-m3/383b2f80-774b-4f76-998a-9d3d20a265db.json b/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_SigmaBoi-bge-m3/383b2f80-774b-4f76-998a-9d3d20a265db.json deleted file mode 100644 index 8db2748bfd5b9a224469f341580723501179cbc7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_SigmaBoi-bge-m3/383b2f80-774b-4f76-998a-9d3d20a265db.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-bge-m3/1762652580.572246", - "retrieved_timestamp": "1762652580.572247", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tinycompany/SigmaBoi-bge-m3", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/SigmaBoi-bge-m3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24502431326657714 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43509173985964184 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4383020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28191489361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 2.943 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_SigmaBoi-bgem3/2b84e1be-81f6-474e-be5b-c5f4e60167fe.json b/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_SigmaBoi-bgem3/2b84e1be-81f6-474e-be5b-c5f4e60167fe.json deleted file mode 100644 index 5ad7537770344c4020fd25ea82997c7687a93542..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_SigmaBoi-bgem3/2b84e1be-81f6-474e-be5b-c5f4e60167fe.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-bgem3/1762652580.572469", - "retrieved_timestamp": "1762652580.57247", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tinycompany/SigmaBoi-bgem3", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/SigmaBoi-bgem3" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24502431326657714 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43509173985964184 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07628398791540786 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4383020833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28191489361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 2.943 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_SigmaBoi-ib/55c0df8c-8dba-4508-8fe3-6ee726fa8a44.json b/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_SigmaBoi-ib/55c0df8c-8dba-4508-8fe3-6ee726fa8a44.json deleted file mode 100644 index 7bd25328eda9ab30eec0eb43a67f7f6e8ae7cc95..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_SigmaBoi-ib/55c0df8c-8dba-4508-8fe3-6ee726fa8a44.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-ib/1762652580.572692", - "retrieved_timestamp": "1762652580.572693", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tinycompany/SigmaBoi-ib", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/SigmaBoi-ib" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24774708883540117 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4343622024096135 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07401812688821752 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42896874999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2824135638297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 2.943 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_SigmaBoi-nomic-moe/2dff318a-f64f-407b-acd3-2b1020d3f5cd.json b/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_SigmaBoi-nomic-moe/2dff318a-f64f-407b-acd3-2b1020d3f5cd.json deleted file mode 100644 index f9d43f336bae65b65dc6be5c6824b0fa0f6e3ff6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_SigmaBoi-nomic-moe/2dff318a-f64f-407b-acd3-2b1020d3f5cd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-nomic-moe/1762652580.57291", - "retrieved_timestamp": "1762652580.572911", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tinycompany/SigmaBoi-nomic-moe", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/SigmaBoi-nomic-moe" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2474223948013493 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43341835214223373 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29278523489932884 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43163541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28366023936170215 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 2.943 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_SigmaBoi-nomic1.5-fp32/39b85f29-d449-40d6-bb0e-cb4790a47cc7.json b/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_SigmaBoi-nomic1.5-fp32/39b85f29-d449-40d6-bb0e-cb4790a47cc7.json deleted file mode 100644 index b4427cd31bba43ac210dbbd6a55427c79f4980d7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_SigmaBoi-nomic1.5-fp32/39b85f29-d449-40d6-bb0e-cb4790a47cc7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-nomic1.5-fp32/1762652580.573416", - "retrieved_timestamp": "1762652580.573416", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tinycompany/SigmaBoi-nomic1.5-fp32", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/SigmaBoi-nomic1.5-fp32" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24622335403396323 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43705348265770266 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4316041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28407579787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 2.943 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_SigmaBoi-nomic1.5/9ff57503-4fc4-4d21-8899-d691c912bff9.json b/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_SigmaBoi-nomic1.5/9ff57503-4fc4-4d21-8899-d691c912bff9.json deleted file mode 100644 index 558f893036c0d3063f0421bc76df5d26074e71fe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_SigmaBoi-nomic1.5/9ff57503-4fc4-4d21-8899-d691c912bff9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tinycompany_SigmaBoi-nomic1.5/1762652580.5731819", - "retrieved_timestamp": "1762652580.5731819", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tinycompany/SigmaBoi-nomic1.5", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/SigmaBoi-nomic1.5" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24469961923252526 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43705348265770266 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4316041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28407579787234044 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 2.943 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_Tamed-Shawty/6d2370ea-55ab-4ae7-a11a-c1556e988349.json b/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_Tamed-Shawty/6d2370ea-55ab-4ae7-a11a-c1556e988349.json deleted file mode 100644 index f0ee696d90f36097a9beb67404ac227e1b277464..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tinycompany/tinycompany_Tamed-Shawty/6d2370ea-55ab-4ae7-a11a-c1556e988349.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tinycompany_Tamed-Shawty/1762652580.573629", - "retrieved_timestamp": "1762652580.573629", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tinycompany/Tamed-Shawty", - "developer": "tinycompany", - "inference_platform": "unknown", - "id": "tinycompany/Tamed-Shawty" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38308576798450333 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3837059588999942 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2625838926174497 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35009375000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2601396276595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.562 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tklohj/tklohj_WindyFloLLM/53f0c477-6f06-427a-be34-5b0131cbf9e1.json b/leaderboard_data/HFOpenLLMv2/tklohj/tklohj_WindyFloLLM/53f0c477-6f06-427a-be34-5b0131cbf9e1.json deleted file mode 100644 index e1a790ca3e14bc4f2321c330e51b379cd24af853..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tklohj/tklohj_WindyFloLLM/53f0c477-6f06-427a-be34-5b0131cbf9e1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tklohj_WindyFloLLM/1762652580.573854", - "retrieved_timestamp": "1762652580.573855", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tklohj/WindyFloLLM", - "developer": "tklohj", - "inference_platform": "unknown", - "id": "tklohj/WindyFloLLM" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26685638550158025 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4636616007058791 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2751677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4253125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25814494680851063 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.016 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/togethercomputer/togethercomputer_GPT-NeoXT-Chat-Base-20B/3b5ca740-a1e5-4043-ad56-c772bbdd1b38.json b/leaderboard_data/HFOpenLLMv2/togethercomputer/togethercomputer_GPT-NeoXT-Chat-Base-20B/3b5ca740-a1e5-4043-ad56-c772bbdd1b38.json deleted file mode 100644 index 8f472e831b2f40ae41cbb1abc7a444dc4371f5aa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/togethercomputer/togethercomputer_GPT-NeoXT-Chat-Base-20B/3b5ca740-a1e5-4043-ad56-c772bbdd1b38.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/togethercomputer_GPT-NeoXT-Chat-Base-20B/1762652580.574344", - "retrieved_timestamp": "1762652580.5743449", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "togethercomputer/GPT-NeoXT-Chat-Base-20B", - "developer": "togethercomputer", - "inference_platform": "unknown", - "id": "togethercomputer/GPT-NeoXT-Chat-Base-20B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.18297561581049393 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33209702572173033 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.023413897280966767 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3460625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11452792553191489 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 20.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/togethercomputer/togethercomputer_Llama-2-7B-32K-Instruct/a1609dba-826b-4246-9230-35bd68268fe4.json b/leaderboard_data/HFOpenLLMv2/togethercomputer/togethercomputer_Llama-2-7B-32K-Instruct/a1609dba-826b-4246-9230-35bd68268fe4.json deleted file mode 100644 index 85f257de2c2a20dea871d6e8740c4fe6c7c7f71b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/togethercomputer/togethercomputer_Llama-2-7B-32K-Instruct/a1609dba-826b-4246-9230-35bd68268fe4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/togethercomputer_Llama-2-7B-32K-Instruct/1762652580.574983", - "retrieved_timestamp": "1762652580.5749838", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "togethercomputer/Llama-2-7B-32K-Instruct", - "developer": "togethercomputer", - "inference_platform": "unknown", - "id": "togethercomputer/Llama-2-7B-32K-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2130003945087922 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34434724239927544 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2516778523489933 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40559375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17810837765957446 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/togethercomputer/togethercomputer_RedPajama-INCITE-7B-Base/8d69f711-74c9-4c1e-87dc-9b46f70674bb.json b/leaderboard_data/HFOpenLLMv2/togethercomputer/togethercomputer_RedPajama-INCITE-7B-Base/8d69f711-74c9-4c1e-87dc-9b46f70674bb.json deleted file mode 100644 index 633c68ef427fbffe058ecbec4612b8863a21bb41..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/togethercomputer/togethercomputer_RedPajama-INCITE-7B-Base/8d69f711-74c9-4c1e-87dc-9b46f70674bb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/togethercomputer_RedPajama-INCITE-7B-Base/1762652580.5751948", - "retrieved_timestamp": "1762652580.5751958", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "togethercomputer/RedPajama-INCITE-7B-Base", - "developer": "togethercomputer", - "inference_platform": "unknown", - "id": "togethercomputer/RedPajama-INCITE-7B-Base" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20822971936683554 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31948898765013445 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.015861027190332326 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36199999999999993 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1196808510638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/togethercomputer/togethercomputer_RedPajama-INCITE-7B-Chat/c3b6efec-5428-499f-8e6b-e3b2b87a0d15.json b/leaderboard_data/HFOpenLLMv2/togethercomputer/togethercomputer_RedPajama-INCITE-7B-Chat/c3b6efec-5428-499f-8e6b-e3b2b87a0d15.json deleted file mode 100644 index 2e00556867dc5c07044969d7a45045f83043d642..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/togethercomputer/togethercomputer_RedPajama-INCITE-7B-Chat/c3b6efec-5428-499f-8e6b-e3b2b87a0d15.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/togethercomputer_RedPajama-INCITE-7B-Chat/1762652580.57541", - "retrieved_timestamp": "1762652580.5754108", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "togethercomputer/RedPajama-INCITE-7B-Chat", - "developer": "togethercomputer", - "inference_platform": "unknown", - "id": "togethercomputer/RedPajama-INCITE-7B-Chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1557977278066641 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3175449328457368 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.006797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2525167785234899 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3447604166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11211768617021277 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/togethercomputer/togethercomputer_RedPajama-INCITE-7B-Instruct/d8cef007-51ab-4793-9a74-d9f29d6c0f27.json b/leaderboard_data/HFOpenLLMv2/togethercomputer/togethercomputer_RedPajama-INCITE-7B-Instruct/d8cef007-51ab-4793-9a74-d9f29d6c0f27.json deleted file mode 100644 index 720cd312be219fa0ff9df965f8ca47a4f1bf8ab1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/togethercomputer/togethercomputer_RedPajama-INCITE-7B-Instruct/d8cef007-51ab-4793-9a74-d9f29d6c0f27.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/togethercomputer_RedPajama-INCITE-7B-Instruct/1762652580.57568", - "retrieved_timestamp": "1762652580.575681", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "togethercomputer/RedPajama-INCITE-7B-Instruct", - "developer": "togethercomputer", - "inference_platform": "unknown", - "id": "togethercomputer/RedPajama-INCITE-7B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2055069437980115 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.337743947089799 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25083892617449666 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3685104166666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1272440159574468 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 7.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/togethercomputer/togethercomputer_RedPajama-INCITE-Base-3B-v1/ba5c73b3-4785-44ef-8bfb-cfbbbdc16a91.json b/leaderboard_data/HFOpenLLMv2/togethercomputer/togethercomputer_RedPajama-INCITE-Base-3B-v1/ba5c73b3-4785-44ef-8bfb-cfbbbdc16a91.json deleted file mode 100644 index 826d547fa5d9fe09fdeac506fee7365a38d8edf6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/togethercomputer/togethercomputer_RedPajama-INCITE-Base-3B-v1/ba5c73b3-4785-44ef-8bfb-cfbbbdc16a91.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/togethercomputer_RedPajama-INCITE-Base-3B-v1/1762652580.575899", - "retrieved_timestamp": "1762652580.5758998", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "togethercomputer/RedPajama-INCITE-Base-3B-v1", - "developer": "togethercomputer", - "inference_platform": "unknown", - "id": "togethercomputer/RedPajama-INCITE-Base-3B-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22936253584932426 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3060403878987615 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24328859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37387499999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11112034574468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 3.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/togethercomputer/togethercomputer_RedPajama-INCITE-Chat-3B-v1/9a0e6d99-4f86-4ce8-9b5a-f7b6c0fbd710.json b/leaderboard_data/HFOpenLLMv2/togethercomputer/togethercomputer_RedPajama-INCITE-Chat-3B-v1/9a0e6d99-4f86-4ce8-9b5a-f7b6c0fbd710.json deleted file mode 100644 index e238a27d54ee66f6473e7b5eff3ba01c5dcf5793..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/togethercomputer/togethercomputer_RedPajama-INCITE-Chat-3B-v1/9a0e6d99-4f86-4ce8-9b5a-f7b6c0fbd710.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/togethercomputer_RedPajama-INCITE-Chat-3B-v1/1762652580.5763452", - "retrieved_timestamp": "1762652580.5763478", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "togethercomputer/RedPajama-INCITE-Chat-3B-v1", - "developer": "togethercomputer", - "inference_platform": "unknown", - "id": "togethercomputer/RedPajama-INCITE-Chat-3B-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16521496296493304 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32166937119202416 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24412751677852348 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3684479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11269946808510638 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 3.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/togethercomputer/togethercomputer_RedPajama-INCITE-Instruct-3B-v1/e78a3888-33c7-4264-a01e-b0661504322f.json b/leaderboard_data/HFOpenLLMv2/togethercomputer/togethercomputer_RedPajama-INCITE-Instruct-3B-v1/e78a3888-33c7-4264-a01e-b0661504322f.json deleted file mode 100644 index 15d8d477b45e32df58617754ed3c61320bb01a5f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/togethercomputer/togethercomputer_RedPajama-INCITE-Instruct-3B-v1/e78a3888-33c7-4264-a01e-b0661504322f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/togethercomputer_RedPajama-INCITE-Instruct-3B-v1/1762652580.576687", - "retrieved_timestamp": "1762652580.576688", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "togethercomputer/RedPajama-INCITE-Instruct-3B-v1", - "developer": "togethercomputer", - "inference_platform": "unknown", - "id": "togethercomputer/RedPajama-INCITE-Instruct-3B-v1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2124263620526869 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3146017752057237 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24748322147651006 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38860416666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11095412234042554 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GPTNeoXForCausalLM", - "params_billions": 3.0 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tokyotech-llm/tokyotech-llm_Llama-3-Swallow-8B-Instruct-v0.1/f6729e0a-559f-4087-af75-37634bf0af62.json b/leaderboard_data/HFOpenLLMv2/tokyotech-llm/tokyotech-llm_Llama-3-Swallow-8B-Instruct-v0.1/f6729e0a-559f-4087-af75-37634bf0af62.json deleted file mode 100644 index 83ba80162624e3bb04defea53bcd8d57b4928680..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tokyotech-llm/tokyotech-llm_Llama-3-Swallow-8B-Instruct-v0.1/f6729e0a-559f-4087-af75-37634bf0af62.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tokyotech-llm_Llama-3-Swallow-8B-Instruct-v0.1/1762652580.5769222", - "retrieved_timestamp": "1762652580.576923", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1", - "developer": "tokyotech-llm", - "inference_platform": "unknown", - "id": "tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5507719517546776 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5009389976232003 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07477341389728097 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43569791666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087599734042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tomasmcm/tomasmcm_sky-t1-coder-32b-flash/1229310f-22aa-4ef9-b354-71fa249569f7.json b/leaderboard_data/HFOpenLLMv2/tomasmcm/tomasmcm_sky-t1-coder-32b-flash/1229310f-22aa-4ef9-b354-71fa249569f7.json deleted file mode 100644 index 049f72d3fbf6e12b3cb3e07acfabe322074fe60e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tomasmcm/tomasmcm_sky-t1-coder-32b-flash/1229310f-22aa-4ef9-b354-71fa249569f7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tomasmcm_sky-t1-coder-32b-flash/1762652580.577295", - "retrieved_timestamp": "1762652580.5772958", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tomasmcm/sky-t1-coder-32b-flash", - "developer": "tomasmcm", - "inference_platform": "unknown", - "id": "tomasmcm/sky-t1-coder-32b-flash" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7780090160773414 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6822440044314982 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5422960725075529 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36828859060402686 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4232708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5782081117021277 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/tugstugi/tugstugi_Qwen2.5-7B-Instruct-QwQ-v0.1/1cfb7d70-b903-48ae-bdb2-31c838bdabc8.json b/leaderboard_data/HFOpenLLMv2/tugstugi/tugstugi_Qwen2.5-7B-Instruct-QwQ-v0.1/1cfb7d70-b903-48ae-bdb2-31c838bdabc8.json deleted file mode 100644 index 0de6b10eac10cf871e9420dffdf1bf77c55681e4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/tugstugi/tugstugi_Qwen2.5-7B-Instruct-QwQ-v0.1/1cfb7d70-b903-48ae-bdb2-31c838bdabc8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/tugstugi_Qwen2.5-7B-Instruct-QwQ-v0.1/1762652580.577852", - "retrieved_timestamp": "1762652580.577852", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "tugstugi/Qwen2.5-7B-Instruct-QwQ-v0.1", - "developer": "tugstugi", - "inference_platform": "unknown", - "id": "tugstugi/Qwen2.5-7B-Instruct-QwQ-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6017300761978217 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5101062293388118 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3814199395770393 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2684563758389262 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3794270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4080784574468085 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/unsloth/unsloth_Llama-3.2-1B-Instruct-no-system-message/d8d52ed0-2eb6-4be3-9e4e-346a6b19ceca.json b/leaderboard_data/HFOpenLLMv2/unsloth/unsloth_Llama-3.2-1B-Instruct-no-system-message/d8d52ed0-2eb6-4be3-9e4e-346a6b19ceca.json deleted file mode 100644 index 18d8709df38700cd2e549d9f5c14c73a1498c6c5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/unsloth/unsloth_Llama-3.2-1B-Instruct-no-system-message/d8d52ed0-2eb6-4be3-9e4e-346a6b19ceca.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/unsloth_Llama-3.2-1B-Instruct-no-system-message/1762652580.578731", - "retrieved_timestamp": "1762652580.578733", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "unsloth/Llama-3.2-1B-Instruct-no-system-message", - "developer": "unsloth", - "inference_platform": "unknown", - "id": "unsloth/Llama-3.2-1B-Instruct-no-system-message" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5649853499824908 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3543744783345775 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0755287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2726510067114094 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3340625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1668882978723404 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/unsloth/unsloth_Llama-3.2-1B-Instruct/25ec2dbd-465f-40a9-80f0-e4001e621303.json b/leaderboard_data/HFOpenLLMv2/unsloth/unsloth_Llama-3.2-1B-Instruct/25ec2dbd-465f-40a9-80f0-e4001e621303.json deleted file mode 100644 index c6255d68a358101a32bcd08fbf5f721e95748c6b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/unsloth/unsloth_Llama-3.2-1B-Instruct/25ec2dbd-465f-40a9-80f0-e4001e621303.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/unsloth_Llama-3.2-1B-Instruct/1762652580.578335", - "retrieved_timestamp": "1762652580.578335", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "unsloth/Llama-3.2-1B-Instruct", - "developer": "unsloth", - "inference_platform": "unknown", - "id": "unsloth/Llama-3.2-1B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5809973093613834 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34847036874553655 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0823262839879154 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3196145833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17420212765957446 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.236 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/unsloth/unsloth_Phi-3-mini-4k-instruct/36d52065-1de2-4661-bf23-85276a8ede2f.json b/leaderboard_data/HFOpenLLMv2/unsloth/unsloth_Phi-3-mini-4k-instruct/36d52065-1de2-4661-bf23-85276a8ede2f.json deleted file mode 100644 index 109184ec378d620bab3ff79736531555d2d23854..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/unsloth/unsloth_Phi-3-mini-4k-instruct/36d52065-1de2-4661-bf23-85276a8ede2f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/unsloth_Phi-3-mini-4k-instruct/1762652580.579097", - "retrieved_timestamp": "1762652580.5790982", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "unsloth/Phi-3-mini-4k-instruct", - "developer": "unsloth", - "inference_platform": "unknown", - "id": "unsloth/Phi-3-mini-4k-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.544027624480822 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5500239467441027 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16389728096676737 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32298657718120805 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42841666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4030917553191489 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/upstage/upstage_SOLAR-10.7B-Instruct-v1.0/9d750c83-0b27-437b-ae33-dd21a3313a04.json b/leaderboard_data/HFOpenLLMv2/upstage/upstage_SOLAR-10.7B-Instruct-v1.0/9d750c83-0b27-437b-ae33-dd21a3313a04.json deleted file mode 100644 index 2a15ca6968468f2d4d134e3ce3bd5faee2a23229..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/upstage/upstage_SOLAR-10.7B-Instruct-v1.0/9d750c83-0b27-437b-ae33-dd21a3313a04.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/upstage_SOLAR-10.7B-Instruct-v1.0/1762652580.580213", - "retrieved_timestamp": "1762652580.58022", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "upstage/SOLAR-10.7B-Instruct-v1.0", - "developer": "upstage", - "inference_platform": "unknown", - "id": "upstage/SOLAR-10.7B-Instruct-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4736609972650345 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5162494941446991 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3899375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31382978723404253 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/upstage/upstage_SOLAR-10.7B-v1.0/b29dbad1-7c1c-4ed2-8f44-45d54fed4880.json b/leaderboard_data/HFOpenLLMv2/upstage/upstage_SOLAR-10.7B-v1.0/b29dbad1-7c1c-4ed2-8f44-45d54fed4880.json deleted file mode 100644 index 89a50d0c9c8179c842b674c9ce64408728ed3e3e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/upstage/upstage_SOLAR-10.7B-v1.0/b29dbad1-7c1c-4ed2-8f44-45d54fed4880.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/upstage_SOLAR-10.7B-v1.0/1762652580.5805068", - "retrieved_timestamp": "1762652580.580508", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "upstage/SOLAR-10.7B-v1.0", - "developer": "upstage", - "inference_platform": "unknown", - "id": "upstage/SOLAR-10.7B-v1.0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24212644671693329 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5093873084711799 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.026435045317220542 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28104026845637586 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43715624999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3400099734042553 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/upstage/upstage_solar-pro-preview-instruct/00398bb3-0c84-4b3b-bcf1-61e84313b3e3.json b/leaderboard_data/HFOpenLLMv2/upstage/upstage_solar-pro-preview-instruct/00398bb3-0c84-4b3b-bcf1-61e84313b3e3.json deleted file mode 100644 index cfccb749e24f24ab4a45f851d9ca0871303fa6ed..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/upstage/upstage_solar-pro-preview-instruct/00398bb3-0c84-4b3b-bcf1-61e84313b3e3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/upstage_solar-pro-preview-instruct/1762652580.5807302", - "retrieved_timestamp": "1762652580.580731", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "upstage/solar-pro-preview-instruct", - "developer": "upstage", - "inference_platform": "unknown", - "id": "upstage/solar-pro-preview-instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8415814483348626 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6816843051379534 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22054380664652568 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37080536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44165625000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.52734375 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "SolarForCausalLM", - "params_billions": 22.14 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/utkmst/utkmst_chimera-beta-test2-lora-merged/00620da3-d3ee-442a-a319-248906d959c0.json b/leaderboard_data/HFOpenLLMv2/utkmst/utkmst_chimera-beta-test2-lora-merged/00620da3-d3ee-442a-a319-248906d959c0.json deleted file mode 100644 index b69fc790f4307779558738f4627eacf078b425aa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/utkmst/utkmst_chimera-beta-test2-lora-merged/00620da3-d3ee-442a-a319-248906d959c0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/utkmst_chimera-beta-test2-lora-merged/1762652580.581129", - "retrieved_timestamp": "1762652580.581131", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "utkmst/chimera-beta-test2-lora-merged", - "developer": "utkmst", - "inference_platform": "unknown", - "id": "utkmst/chimera-beta-test2-lora-merged" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6054269338688014 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47957156724192185 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09516616314199396 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3036912751677852 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4117916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2992021276595745 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/uukuguy/uukuguy_speechless-coder-ds-6.7b/a3ba5a65-b137-42ad-868b-9aa5c24afd07.json b/leaderboard_data/HFOpenLLMv2/uukuguy/uukuguy_speechless-coder-ds-6.7b/a3ba5a65-b137-42ad-868b-9aa5c24afd07.json deleted file mode 100644 index 85b5877b26caf98e1db36fce55c489f24ef1cd9f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/uukuguy/uukuguy_speechless-coder-ds-6.7b/a3ba5a65-b137-42ad-868b-9aa5c24afd07.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/uukuguy_speechless-coder-ds-6.7b/1762652580.582827", - "retrieved_timestamp": "1762652580.582828", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "uukuguy/speechless-coder-ds-6.7b", - "developer": "uukuguy", - "inference_platform": "unknown", - "id": "uukuguy/speechless-coder-ds-6.7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25046986440422525 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4036373344669979 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.021148036253776436 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3819375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.171875 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 6.7 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/uukuguy/uukuguy_speechless-instruct-mistral-7b-v0.2/e115938d-d343-4c03-8f3b-4d86768b2e49.json b/leaderboard_data/HFOpenLLMv2/uukuguy/uukuguy_speechless-instruct-mistral-7b-v0.2/e115938d-d343-4c03-8f3b-4d86768b2e49.json deleted file mode 100644 index b390bcb75d9014d33a99799295053587a55103af..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/uukuguy/uukuguy_speechless-instruct-mistral-7b-v0.2/e115938d-d343-4c03-8f3b-4d86768b2e49.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/uukuguy_speechless-instruct-mistral-7b-v0.2/1762652580.5831082", - "retrieved_timestamp": "1762652580.5831091", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "uukuguy/speechless-instruct-mistral-7b-v0.2", - "developer": "uukuguy", - "inference_platform": "unknown", - "id": "uukuguy/speechless-instruct-mistral-7b-v0.2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3261324397044287 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4606667950681749 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28187919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4901770833333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2902260638297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/uukuguy/uukuguy_speechless-zephyr-code-functionary-7b/82346a60-f31e-45ba-9fae-bd738321f390.json b/leaderboard_data/HFOpenLLMv2/uukuguy/uukuguy_speechless-zephyr-code-functionary-7b/82346a60-f31e-45ba-9fae-bd738321f390.json deleted file mode 100644 index bf31d4cffe85f1bb0aa1321f87063aca09555417..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/uukuguy/uukuguy_speechless-zephyr-code-functionary-7b/82346a60-f31e-45ba-9fae-bd738321f390.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/uukuguy_speechless-zephyr-code-functionary-7b/1762652580.583915", - "retrieved_timestamp": "1762652580.583916", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "uukuguy/speechless-zephyr-code-functionary-7b", - "developer": "uukuguy", - "inference_platform": "unknown", - "id": "uukuguy/speechless-zephyr-code-functionary-7b" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2695791610704043 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46642753957194555 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04229607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30033557046979864 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4267708333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3094248670212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/v000000/v000000_L3-8B-Stheno-v3.2-abliterated/33146dbb-8233-4f3d-9fd9-68cbacc3f293.json b/leaderboard_data/HFOpenLLMv2/v000000/v000000_L3-8B-Stheno-v3.2-abliterated/33146dbb-8233-4f3d-9fd9-68cbacc3f293.json deleted file mode 100644 index 805a3a1e4fec6f714a53cda4632f7204113e97e5..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/v000000/v000000_L3-8B-Stheno-v3.2-abliterated/33146dbb-8233-4f3d-9fd9-68cbacc3f293.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/v000000_L3-8B-Stheno-v3.2-abliterated/1762652580.584157", - "retrieved_timestamp": "1762652580.584158", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "v000000/L3-8B-Stheno-v3.2-abliterated", - "developer": "v000000", - "inference_platform": "unknown", - "id": "v000000/L3-8B-Stheno-v3.2-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6717720093795574 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5141439214918061 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06948640483383686 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30956375838926176 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36196875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3603723404255319 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/v000000/v000000_L3.1-Niitorm-8B-DPO-t0.0001/d90cef97-1e73-4068-bcb5-260a3f2586fe.json b/leaderboard_data/HFOpenLLMv2/v000000/v000000_L3.1-Niitorm-8B-DPO-t0.0001/d90cef97-1e73-4068-bcb5-260a3f2586fe.json deleted file mode 100644 index 10eab44a688d64ff915726f33b76f4384a7a3290..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/v000000/v000000_L3.1-Niitorm-8B-DPO-t0.0001/d90cef97-1e73-4068-bcb5-260a3f2586fe.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/v000000_L3.1-Niitorm-8B-DPO-t0.0001/1762652580.5844421", - "retrieved_timestamp": "1762652580.5844429", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "v000000/L3.1-Niitorm-8B-DPO-t0.0001", - "developer": "v000000", - "inference_platform": "unknown", - "id": "v000000/L3.1-Niitorm-8B-DPO-t0.0001" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7688666072687137 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5134234526726582 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1623867069486405 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3879791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38663563829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/v000000/v000000_L3.1-Storniitova-8B/761f0cc0-c202-490d-93b4-447244f1e40a.json b/leaderboard_data/HFOpenLLMv2/v000000/v000000_L3.1-Storniitova-8B/761f0cc0-c202-490d-93b4-447244f1e40a.json deleted file mode 100644 index 47de662a95dd75a08507262e160ad0a652c6461a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/v000000/v000000_L3.1-Storniitova-8B/761f0cc0-c202-490d-93b4-447244f1e40a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/v000000_L3.1-Storniitova-8B/1762652580.584696", - "retrieved_timestamp": "1762652580.584697", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "v000000/L3.1-Storniitova-8B", - "developer": "v000000", - "inference_platform": "unknown", - "id": "v000000/L3.1-Storniitova-8B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7816560060639104 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5151452004311876 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14652567975830816 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4028958333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37757646276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/v000000/v000000_Qwen2.5-14B-Gutenberg-Instruct-Slerpeno/1f1da15c-3a82-4dfb-9b73-4381c70eb1ef.json b/leaderboard_data/HFOpenLLMv2/v000000/v000000_Qwen2.5-14B-Gutenberg-Instruct-Slerpeno/1f1da15c-3a82-4dfb-9b73-4381c70eb1ef.json deleted file mode 100644 index 3a5dfaea1cb682942d3aed30d0ef6eb6afacb436..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/v000000/v000000_Qwen2.5-14B-Gutenberg-Instruct-Slerpeno/1f1da15c-3a82-4dfb-9b73-4381c70eb1ef.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/v000000_Qwen2.5-14B-Gutenberg-Instruct-Slerpeno/1762652580.585153", - "retrieved_timestamp": "1762652580.585153", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "v000000/Qwen2.5-14B-Gutenberg-Instruct-Slerpeno", - "developer": "v000000", - "inference_platform": "unknown", - "id": "v000000/Qwen2.5-14B-Gutenberg-Instruct-Slerpeno" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8197493760998595 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.639010174859259 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5324773413897281 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3313758389261745 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4113645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4923537234042553 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/vhab10/vhab10_Llama-3.1-8B-Base-Instruct-SLERP/982455a4-fb4f-4eed-96a0-c46d9eb11937.json b/leaderboard_data/HFOpenLLMv2/vhab10/vhab10_Llama-3.1-8B-Base-Instruct-SLERP/982455a4-fb4f-4eed-96a0-c46d9eb11937.json deleted file mode 100644 index 55d56e91fa333d3c1700fcd129cbe494f9be5604..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/vhab10/vhab10_Llama-3.1-8B-Base-Instruct-SLERP/982455a4-fb4f-4eed-96a0-c46d9eb11937.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/vhab10_Llama-3.1-8B-Base-Instruct-SLERP/1762652580.585581", - "retrieved_timestamp": "1762652580.585582", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "vhab10/Llama-3.1-8B-Base-Instruct-SLERP", - "developer": "vhab10", - "inference_platform": "unknown", - "id": "vhab10/Llama-3.1-8B-Base-Instruct-SLERP" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.290711977552893 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5057443268070797 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.12009063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2961409395973154 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40106250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3621176861702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/vhab10/vhab10_Llama-3.2-Instruct-3B-TIES/22f8bb3f-4794-46b1-828e-75711a1233bd.json b/leaderboard_data/HFOpenLLMv2/vhab10/vhab10_Llama-3.2-Instruct-3B-TIES/22f8bb3f-4794-46b1-828e-75711a1233bd.json deleted file mode 100644 index 289c1b35db7909d86ceb5c6f6cff7728843f3b08..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/vhab10/vhab10_Llama-3.2-Instruct-3B-TIES/22f8bb3f-4794-46b1-828e-75711a1233bd.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/vhab10_Llama-3.2-Instruct-3B-TIES/1762652580.585841", - "retrieved_timestamp": "1762652580.585842", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "vhab10/Llama-3.2-Instruct-3B-TIES", - "developer": "vhab10", - "inference_platform": "unknown", - "id": "vhab10/Llama-3.2-Instruct-3B-TIES" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4727367828472896 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43323649966514094 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09818731117824774 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34965625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2915558510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.848 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/vicgalle/vicgalle_CarbonBeagle-11B-truthy/d67aa278-fcc9-4404-a87a-4be9e1bdaa1a.json b/leaderboard_data/HFOpenLLMv2/vicgalle/vicgalle_CarbonBeagle-11B-truthy/d67aa278-fcc9-4404-a87a-4be9e1bdaa1a.json deleted file mode 100644 index 34a84e3d842be670cc7cc094a55fd7fc953e321c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/vicgalle/vicgalle_CarbonBeagle-11B-truthy/d67aa278-fcc9-4404-a87a-4be9e1bdaa1a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/vicgalle_CarbonBeagle-11B-truthy/1762652580.586528", - "retrieved_timestamp": "1762652580.586528", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "vicgalle/CarbonBeagle-11B-truthy", - "developer": "vicgalle", - "inference_platform": "unknown", - "id": "vicgalle/CarbonBeagle-11B-truthy" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5212214701436633 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5348420085288232 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04909365558912387 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29949664429530204 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37396874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.335688164893617 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/vicgalle/vicgalle_CarbonBeagle-11B/b906411a-6663-4c9f-9fe6-4d60e99e4e41.json b/leaderboard_data/HFOpenLLMv2/vicgalle/vicgalle_CarbonBeagle-11B/b906411a-6663-4c9f-9fe6-4d60e99e4e41.json deleted file mode 100644 index 90b820a9d491e0f9afea642f877212165c772982..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/vicgalle/vicgalle_CarbonBeagle-11B/b906411a-6663-4c9f-9fe6-4d60e99e4e41.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/vicgalle_CarbonBeagle-11B/1762652580.5862951", - "retrieved_timestamp": "1762652580.5862951", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "vicgalle/CarbonBeagle-11B", - "developer": "vicgalle", - "inference_platform": "unknown", - "id": "vicgalle/CarbonBeagle-11B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5415298075772285 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5293652486530874 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.061933534743202415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40203125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32762632978723405 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/vicgalle/vicgalle_Configurable-Llama-3.1-8B-Instruct/82a3253a-7a6e-4d75-8ea2-114b4dee6d16.json b/leaderboard_data/HFOpenLLMv2/vicgalle/vicgalle_Configurable-Llama-3.1-8B-Instruct/82a3253a-7a6e-4d75-8ea2-114b4dee6d16.json deleted file mode 100644 index 88dc727ded99208278bb99b9178430c85747ba4e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/vicgalle/vicgalle_Configurable-Llama-3.1-8B-Instruct/82a3253a-7a6e-4d75-8ea2-114b4dee6d16.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/vicgalle_Configurable-Llama-3.1-8B-Instruct/1762652580.586963", - "retrieved_timestamp": "1762652580.586964", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "vicgalle/Configurable-Llama-3.1-8B-Instruct", - "developer": "vicgalle", - "inference_platform": "unknown", - "id": "vicgalle/Configurable-Llama-3.1-8B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8312399987588488 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5044756225072481 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1729607250755287 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3845416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3592087765957447 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/vicgalle/vicgalle_Configurable-Yi-1.5-9B-Chat/0a933130-dca9-435c-a529-16065b540aab.json b/leaderboard_data/HFOpenLLMv2/vicgalle/vicgalle_Configurable-Yi-1.5-9B-Chat/0a933130-dca9-435c-a529-16065b540aab.json deleted file mode 100644 index 790070bfcae4bfe38f02d96639ba8079658a7f57..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/vicgalle/vicgalle_Configurable-Yi-1.5-9B-Chat/0a933130-dca9-435c-a529-16065b540aab.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/vicgalle_Configurable-Yi-1.5-9B-Chat/1762652580.587164", - "retrieved_timestamp": "1762652580.5871649", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "vicgalle/Configurable-Yi-1.5-9B-Chat", - "developer": "vicgalle", - "inference_platform": "unknown", - "id": "vicgalle/Configurable-Yi-1.5-9B-Chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43234506664538974 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5452196737175008 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20468277945619334 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34312080536912754 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42711458333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4015126329787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.829 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/vicgalle/vicgalle_ConfigurableBeagle-11B/3fd95536-ec61-4470-9082-14a116d20e80.json b/leaderboard_data/HFOpenLLMv2/vicgalle/vicgalle_ConfigurableBeagle-11B/3fd95536-ec61-4470-9082-14a116d20e80.json deleted file mode 100644 index 590a3c990510fa85b6edff069fd748f5292776cf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/vicgalle/vicgalle_ConfigurableBeagle-11B/3fd95536-ec61-4470-9082-14a116d20e80.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/vicgalle_ConfigurableBeagle-11B/1762652580.587369", - "retrieved_timestamp": "1762652580.58737", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "vicgalle/ConfigurableBeagle-11B", - "developer": "vicgalle", - "inference_platform": "unknown", - "id": "vicgalle/ConfigurableBeagle-11B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5834452585805663 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5286592318626696 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04305135951661632 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39530208333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33743351063829785 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/vicgalle/vicgalle_ConfigurableHermes-7B/176727e5-31dc-462a-8210-4735543c32f2.json b/leaderboard_data/HFOpenLLMv2/vicgalle/vicgalle_ConfigurableHermes-7B/176727e5-31dc-462a-8210-4735543c32f2.json deleted file mode 100644 index 9bdb32b09f3d442523cdb40c21cbefa44a7aef31..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/vicgalle/vicgalle_ConfigurableHermes-7B/176727e5-31dc-462a-8210-4735543c32f2.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/vicgalle_ConfigurableHermes-7B/1762652580.5875661", - "retrieved_timestamp": "1762652580.587567", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "vicgalle/ConfigurableHermes-7B", - "developer": "vicgalle", - "inference_platform": "unknown", - "id": "vicgalle/ConfigurableHermes-7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5410798902467675 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4572969627830424 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04758308157099698 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27684563758389263 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4056875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3025265957446808 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/vicgalle/vicgalle_ConfigurableSOLAR-10.7B/2dec3c49-01f0-4940-aa45-e7a6b2648e8f.json b/leaderboard_data/HFOpenLLMv2/vicgalle/vicgalle_ConfigurableSOLAR-10.7B/2dec3c49-01f0-4940-aa45-e7a6b2648e8f.json deleted file mode 100644 index 5218ff68aec6cd8c3964c0287d9cd978b77f0736..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/vicgalle/vicgalle_ConfigurableSOLAR-10.7B/2dec3c49-01f0-4940-aa45-e7a6b2648e8f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/vicgalle_ConfigurableSOLAR-10.7B/1762652580.587757", - "retrieved_timestamp": "1762652580.587758", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "vicgalle/ConfigurableSOLAR-10.7B", - "developer": "vicgalle", - "inference_platform": "unknown", - "id": "vicgalle/ConfigurableSOLAR-10.7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5099558061499045 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48668100977360457 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06646525679758308 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38047916666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31732047872340424 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/vicgalle/vicgalle_Merge-Mixtral-Prometheus-8x7B/e6a0cf8f-323d-40c0-90c2-0e2071321df0.json b/leaderboard_data/HFOpenLLMv2/vicgalle/vicgalle_Merge-Mixtral-Prometheus-8x7B/e6a0cf8f-323d-40c0-90c2-0e2071321df0.json deleted file mode 100644 index 745cdc5ebe1ca755169723cb7e3fe461c52cec65..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/vicgalle/vicgalle_Merge-Mixtral-Prometheus-8x7B/e6a0cf8f-323d-40c0-90c2-0e2071321df0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/vicgalle_Merge-Mixtral-Prometheus-8x7B/1762652580.588394", - "retrieved_timestamp": "1762652580.588395", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "vicgalle/Merge-Mixtral-Prometheus-8x7B", - "developer": "vicgalle", - "inference_platform": "unknown", - "id": "vicgalle/Merge-Mixtral-Prometheus-8x7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5744025851407598 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5351498071096573 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09290030211480363 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3087248322147651 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40975 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3683510638297872 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/vihangd/vihangd_smart-dan-sft-v0.1/00de0fac-e1a7-449a-969d-624cbe9adab1.json b/leaderboard_data/HFOpenLLMv2/vihangd/vihangd_smart-dan-sft-v0.1/00de0fac-e1a7-449a-969d-624cbe9adab1.json deleted file mode 100644 index 7d21f05124a8f44fba424f78fdb6a54c832d3fb3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/vihangd/vihangd_smart-dan-sft-v0.1/00de0fac-e1a7-449a-969d-624cbe9adab1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/vihangd_smart-dan-sft-v0.1/1762652580.589078", - "retrieved_timestamp": "1762652580.5890791", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "vihangd/smart-dan-sft-v0.1", - "developer": "vihangd", - "inference_platform": "unknown", - "id": "vihangd/smart-dan-sft-v0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15764615664215392 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30617689187138886 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.009818731117824773 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2550335570469799 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35018750000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11419547872340426 - } - } - ], - "additional_details": { - "precision": "4bit", - "architecture": "LlamaForCausalLM", - "params_billions": 0.379 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/voidful/voidful_smol-360m-ft/b93d3a57-2535-4150-a2db-71a50569e6ae.json b/leaderboard_data/HFOpenLLMv2/voidful/voidful_smol-360m-ft/b93d3a57-2535-4150-a2db-71a50569e6ae.json deleted file mode 100644 index a7837edc0a97bb0189bdc54240dd079881bf87bf..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/voidful/voidful_smol-360m-ft/b93d3a57-2535-4150-a2db-71a50569e6ae.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/voidful_smol-360m-ft/1762652580.589319", - "retrieved_timestamp": "1762652580.58932", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "voidful/smol-360m-ft", - "developer": "voidful", - "inference_platform": "unknown", - "id": "voidful/smol-360m-ft" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2013103011121602 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3011946898842932 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.008308157099697885 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24580536912751677 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3713645833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10871010638297872 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.362 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/vonjack/vonjack_MobileLLM-125M-HF/2e06f258-9d91-4734-aacc-f417fddad77c.json b/leaderboard_data/HFOpenLLMv2/vonjack/vonjack_MobileLLM-125M-HF/2e06f258-9d91-4734-aacc-f417fddad77c.json deleted file mode 100644 index 73f58fc349711d30ed91123cc0faf0a99e5b52d4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/vonjack/vonjack_MobileLLM-125M-HF/2e06f258-9d91-4734-aacc-f417fddad77c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/vonjack_MobileLLM-125M-HF/1762652580.589566", - "retrieved_timestamp": "1762652580.589567", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "vonjack/MobileLLM-125M-HF", - "developer": "vonjack", - "inference_platform": "unknown", - "id": "vonjack/MobileLLM-125M-HF" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.21072753627042912 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30272988561565645 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.00906344410876133 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37818749999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1163563829787234 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.125 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/vonjack/vonjack_Phi-3-mini-4k-instruct-LLaMAfied/be3635bb-83de-4cbf-8e0f-3a84ee78bd67.json b/leaderboard_data/HFOpenLLMv2/vonjack/vonjack_Phi-3-mini-4k-instruct-LLaMAfied/be3635bb-83de-4cbf-8e0f-3a84ee78bd67.json deleted file mode 100644 index 84502c8b1884bb2134d26b6ba317d0b6e8333dbe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/vonjack/vonjack_Phi-3-mini-4k-instruct-LLaMAfied/be3635bb-83de-4cbf-8e0f-3a84ee78bd67.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/vonjack_Phi-3-mini-4k-instruct-LLaMAfied/1762652580.589802", - "retrieved_timestamp": "1762652580.589803", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "vonjack/Phi-3-mini-4k-instruct-LLaMAfied", - "developer": "vonjack", - "inference_platform": "unknown", - "id": "vonjack/Phi-3-mini-4k-instruct-LLaMAfied" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5787488308798432 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5740684031598843 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.13821752265861026 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33053691275167785 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3923541666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3885472074468085 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.821 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/vonjack/vonjack_Phi-3.5-mini-instruct-hermes-fc-json/19cd2513-03e8-4d78-b222-566fe3928d2b.json b/leaderboard_data/HFOpenLLMv2/vonjack/vonjack_Phi-3.5-mini-instruct-hermes-fc-json/19cd2513-03e8-4d78-b222-566fe3928d2b.json deleted file mode 100644 index e704adc1652b494b8eed71fff5d3bfcea107f4ac..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/vonjack/vonjack_Phi-3.5-mini-instruct-hermes-fc-json/19cd2513-03e8-4d78-b222-566fe3928d2b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/vonjack_Phi-3.5-mini-instruct-hermes-fc-json/1762652580.5900009", - "retrieved_timestamp": "1762652580.5900018", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "vonjack/Phi-3.5-mini-instruct-hermes-fc-json", - "developer": "vonjack", - "inference_platform": "unknown", - "id": "vonjack/Phi-3.5-mini-instruct-hermes-fc-json" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14158432957885078 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29747555432824196 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0075528700906344415 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25419463087248323 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40413541666666664 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11386303191489362 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "?", - "params_billions": 4.132 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/vonjack/vonjack_SmolLM2-1.7B-Merged/97bab408-a5f5-4363-b530-dc4a6966c859.json b/leaderboard_data/HFOpenLLMv2/vonjack/vonjack_SmolLM2-1.7B-Merged/97bab408-a5f5-4363-b530-dc4a6966c859.json deleted file mode 100644 index 5e2aac400631c8b1649a4be2c1c8a29e8507b396..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/vonjack/vonjack_SmolLM2-1.7B-Merged/97bab408-a5f5-4363-b530-dc4a6966c859.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/vonjack_SmolLM2-1.7B-Merged/1762652580.5904331", - "retrieved_timestamp": "1762652580.590434", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "vonjack/SmolLM2-1.7B-Merged", - "developer": "vonjack", - "inference_platform": "unknown", - "id": "vonjack/SmolLM2-1.7B-Merged" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36979658417443495 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3586553457965105 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06268882175226587 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27936241610738255 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34079166666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2047872340425532 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 1.711 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/vonjack/vonjack_SmolLM2-135M-Merged/2c1cab05-b63f-49ca-a709-b5a4e859ba82.json b/leaderboard_data/HFOpenLLMv2/vonjack/vonjack_SmolLM2-135M-Merged/2c1cab05-b63f-49ca-a709-b5a4e859ba82.json deleted file mode 100644 index 7e7da2356f6b5578b7f361fbda6c94302fd6b418..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/vonjack/vonjack_SmolLM2-135M-Merged/2c1cab05-b63f-49ca-a709-b5a4e859ba82.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/vonjack_SmolLM2-135M-Merged/1762652580.590627", - "retrieved_timestamp": "1762652580.590627", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "vonjack/SmolLM2-135M-Merged", - "developer": "vonjack", - "inference_platform": "unknown", - "id": "vonjack/SmolLM2-135M-Merged" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24829674153468353 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3099931265410582 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.011329305135951661 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23825503355704697 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36618749999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11120345744680851 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.135 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/vonjack/vonjack_SmolLM2-360M-Merged/f1980c69-8c24-4fcd-ace1-797195026c7b.json b/leaderboard_data/HFOpenLLMv2/vonjack/vonjack_SmolLM2-360M-Merged/f1980c69-8c24-4fcd-ace1-797195026c7b.json deleted file mode 100644 index a23754a9221e35904c1787af9b3430c4e110c4ef..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/vonjack/vonjack_SmolLM2-360M-Merged/f1980c69-8c24-4fcd-ace1-797195026c7b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/vonjack_SmolLM2-360M-Merged/1762652580.590822", - "retrieved_timestamp": "1762652580.590823", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "vonjack/SmolLM2-360M-Merged", - "developer": "vonjack", - "inference_platform": "unknown", - "id": "vonjack/SmolLM2-360M-Merged" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.32058715319795916 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31548533684955926 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.017371601208459216 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2558724832214765 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3527291666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10979055851063829 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 0.362 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/w4r10ck/w4r10ck_SOLAR-10.7B-Instruct-v1.0-uncensored/9add85f6-b577-449e-8a2f-ae77a2588bc7.json b/leaderboard_data/HFOpenLLMv2/w4r10ck/w4r10ck_SOLAR-10.7B-Instruct-v1.0-uncensored/9add85f6-b577-449e-8a2f-ae77a2588bc7.json deleted file mode 100644 index 1cba78f1f5a2ed10de8e0e133fd230334b1541b4..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/w4r10ck/w4r10ck_SOLAR-10.7B-Instruct-v1.0-uncensored/9add85f6-b577-449e-8a2f-ae77a2588bc7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/w4r10ck_SOLAR-10.7B-Instruct-v1.0-uncensored/1762652580.5912771", - "retrieved_timestamp": "1762652580.591278", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "w4r10ck/SOLAR-10.7B-Instruct-v1.0-uncensored", - "developer": "w4r10ck", - "inference_platform": "unknown", - "id": "w4r10ck/SOLAR-10.7B-Instruct-v1.0-uncensored" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38840609582574237 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5301525050503222 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06570996978851963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4639479166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3343583776595745 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 10.732 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/wanlige/wanlige_li-14b-v0.4-slerp/d2451e41-e4b0-4945-9ace-1b046b11528b.json b/leaderboard_data/HFOpenLLMv2/wanlige/wanlige_li-14b-v0.4-slerp/d2451e41-e4b0-4945-9ace-1b046b11528b.json deleted file mode 100644 index 343f68a7a486a53191ea985b64711abae3e7d7a1..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/wanlige/wanlige_li-14b-v0.4-slerp/d2451e41-e4b0-4945-9ace-1b046b11528b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/wanlige_li-14b-v0.4-slerp/1762652580.591778", - "retrieved_timestamp": "1762652580.591778", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "wanlige/li-14b-v0.4-slerp", - "developer": "wanlige", - "inference_platform": "unknown", - "id": "wanlige/li-14b-v0.4-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4605967721201967 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6587180444175935 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41918429003021146 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4001677852348993 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47675 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5372340425531915 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/wanlige/wanlige_li-14b-v0.4-slerp0.1/54a93ff0-bff3-4252-ba4a-e99f06b46896.json b/leaderboard_data/HFOpenLLMv2/wanlige/wanlige_li-14b-v0.4-slerp0.1/54a93ff0-bff3-4252-ba4a-e99f06b46896.json deleted file mode 100644 index 6b55f55622078f2be0264f195d412b708cef02af..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/wanlige/wanlige_li-14b-v0.4-slerp0.1/54a93ff0-bff3-4252-ba4a-e99f06b46896.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/wanlige_li-14b-v0.4-slerp0.1/1762652580.5919738", - "retrieved_timestamp": "1762652580.591975", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "wanlige/li-14b-v0.4-slerp0.1", - "developer": "wanlige", - "inference_platform": "unknown", - "id": "wanlige/li-14b-v0.4-slerp0.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7922722819895655 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6571741435852609 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5332326283987915 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35906040268456374 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4206666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5294215425531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/wanlige/wanlige_li-14b-v0.4/8965f266-28f1-43f2-b03c-acc4a9478b7c.json b/leaderboard_data/HFOpenLLMv2/wanlige/wanlige_li-14b-v0.4/8965f266-28f1-43f2-b03c-acc4a9478b7c.json deleted file mode 100644 index 2cdd6fb8e39398837ac232a9ad11856b86543cd3..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/wanlige/wanlige_li-14b-v0.4/8965f266-28f1-43f2-b03c-acc4a9478b7c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/wanlige_li-14b-v0.4/1762652580.591545", - "retrieved_timestamp": "1762652580.591546", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "wanlige/li-14b-v0.4", - "developer": "wanlige", - "inference_platform": "unknown", - "id": "wanlige/li-14b-v0.4" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.813279875175645 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6544457993364277 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5574018126888217 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3389261744966443 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.446 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5167054521276596 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.77 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/wannaphong/wannaphong_KhanomTanLLM-Instruct/681b02e4-7b57-42b7-9550-59c664511b01.json b/leaderboard_data/HFOpenLLMv2/wannaphong/wannaphong_KhanomTanLLM-Instruct/681b02e4-7b57-42b7-9550-59c664511b01.json deleted file mode 100644 index 37ee114add7def8bb2ed658945a433dde22cfe8f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/wannaphong/wannaphong_KhanomTanLLM-Instruct/681b02e4-7b57-42b7-9550-59c664511b01.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/wannaphong_KhanomTanLLM-Instruct/1762652580.59218", - "retrieved_timestamp": "1762652580.59218", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "wannaphong/KhanomTanLLM-Instruct", - "developer": "wannaphong", - "inference_platform": "unknown", - "id": "wannaphong/KhanomTanLLM-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.16211762567764643 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30931233392513263 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.013595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634228187919463 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37006249999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1118683510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.447 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/waqasali1707/waqasali1707_Beast-Soul-new/c04bef75-d3cc-463e-ac24-a2b18d3611af.json b/leaderboard_data/HFOpenLLMv2/waqasali1707/waqasali1707_Beast-Soul-new/c04bef75-d3cc-463e-ac24-a2b18d3611af.json deleted file mode 100644 index 06086db5d94dfa768655bdba32f3ff43491f6885..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/waqasali1707/waqasali1707_Beast-Soul-new/c04bef75-d3cc-463e-ac24-a2b18d3611af.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/waqasali1707_Beast-Soul-new/1762652580.592428", - "retrieved_timestamp": "1762652580.592428", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "waqasali1707/Beast-Soul-new", - "developer": "waqasali1707", - "inference_platform": "unknown", - "id": "waqasali1707/Beast-Soul-new" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5029865202108184 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.522494907014536 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0702416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4485625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3107546542553192 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/weathermanj/weathermanj_Menda-3B-500/468d60fa-5c01-41bd-a791-e0e86c2d02bf.json b/leaderboard_data/HFOpenLLMv2/weathermanj/weathermanj_Menda-3B-500/468d60fa-5c01-41bd-a791-e0e86c2d02bf.json deleted file mode 100644 index 90a4be7c5b5a1a8c81d757342a2528aa05498081..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/weathermanj/weathermanj_Menda-3B-500/468d60fa-5c01-41bd-a791-e0e86c2d02bf.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/weathermanj_Menda-3B-500/1762652580.593058", - "retrieved_timestamp": "1762652580.593059", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "weathermanj/Menda-3B-500", - "developer": "weathermanj", - "inference_platform": "unknown", - "id": "weathermanj/Menda-3B-500" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6353021095138676 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4766312519942703 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3723564954682779 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39679166666666665 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3474900265957447 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/weathermanj/weathermanj_Menda-3b-750/9f1f8a2e-3a63-4b8e-85e9-141477fddcc3.json b/leaderboard_data/HFOpenLLMv2/weathermanj/weathermanj_Menda-3b-750/9f1f8a2e-3a63-4b8e-85e9-141477fddcc3.json deleted file mode 100644 index 1f4e2653eacb55f8ee6a55719a80430dfcc0adb2..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/weathermanj/weathermanj_Menda-3b-750/9f1f8a2e-3a63-4b8e-85e9-141477fddcc3.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/weathermanj_Menda-3b-750/1762652580.593308", - "retrieved_timestamp": "1762652580.593309", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "weathermanj/Menda-3b-750", - "developer": "weathermanj", - "inference_platform": "unknown", - "id": "weathermanj/Menda-3b-750" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6335035483627884 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4736825577251204 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39418749999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3505651595744681 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/weathermanj/weathermanj_Menda-3b-Optim-100/e33fb04e-ac99-423f-ac8c-5268e527bf34.json b/leaderboard_data/HFOpenLLMv2/weathermanj/weathermanj_Menda-3b-Optim-100/e33fb04e-ac99-423f-ac8c-5268e527bf34.json deleted file mode 100644 index 42ef002193ec461e14d8dad619e472fc63dc479d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/weathermanj/weathermanj_Menda-3b-Optim-100/e33fb04e-ac99-423f-ac8c-5268e527bf34.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/weathermanj_Menda-3b-Optim-100/1762652580.5935092", - "retrieved_timestamp": "1762652580.59351", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "weathermanj/Menda-3b-Optim-100", - "developer": "weathermanj", - "inference_platform": "unknown", - "id": "weathermanj/Menda-3b-Optim-100" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6398234462337709 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47348022177793836 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3716012084592145 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39930208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3460771276595745 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/weathermanj/weathermanj_Menda-3b-Optim-200/b8b84752-c112-47be-8a86-35ca0e578301.json b/leaderboard_data/HFOpenLLMv2/weathermanj/weathermanj_Menda-3b-Optim-200/b8b84752-c112-47be-8a86-35ca0e578301.json deleted file mode 100644 index 70ee261656040b2df47daca53139ac1378bfbd5b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/weathermanj/weathermanj_Menda-3b-Optim-200/b8b84752-c112-47be-8a86-35ca0e578301.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/weathermanj_Menda-3b-Optim-200/1762652580.5937102", - "retrieved_timestamp": "1762652580.5937111", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "weathermanj/Menda-3b-Optim-200", - "developer": "weathermanj", - "inference_platform": "unknown", - "id": "weathermanj/Menda-3b-Optim-200" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6374752323834094 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.47460604908284837 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3731117824773414 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2827181208053691 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40330208333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3484042553191489 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/win10/win10_ArliAI-RPMax-v1.3-merge-13.3B/16777b0f-3063-45eb-be07-294d13f975ac.json b/leaderboard_data/HFOpenLLMv2/win10/win10_ArliAI-RPMax-v1.3-merge-13.3B/16777b0f-3063-45eb-be07-294d13f975ac.json deleted file mode 100644 index ab7ac095c268f769f95a1912930cfcb7a1ae19f0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/win10/win10_ArliAI-RPMax-v1.3-merge-13.3B/16777b0f-3063-45eb-be07-294d13f975ac.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/win10_ArliAI-RPMax-v1.3-merge-13.3B/1762652580.593927", - "retrieved_timestamp": "1762652580.5939279", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "win10/ArliAI-RPMax-v1.3-merge-13.3B", - "developer": "win10", - "inference_platform": "unknown", - "id": "win10/ArliAI-RPMax-v1.3-merge-13.3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3038260703821416 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4581388671914119 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4325104166666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31998005319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.265 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/win10/win10_Breeze-13B-32k-Instruct-v1_0/bc990db1-c6d9-4113-9946-466bfd5cf9cc.json b/leaderboard_data/HFOpenLLMv2/win10/win10_Breeze-13B-32k-Instruct-v1_0/bc990db1-c6d9-4113-9946-466bfd5cf9cc.json deleted file mode 100644 index 17f28250d195af1710747788da45022934594b86..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/win10/win10_Breeze-13B-32k-Instruct-v1_0/bc990db1-c6d9-4113-9946-466bfd5cf9cc.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/win10_Breeze-13B-32k-Instruct-v1_0/1762652580.5941818", - "retrieved_timestamp": "1762652580.594183", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "win10/Breeze-13B-32k-Instruct-v1_0", - "developer": "win10", - "inference_platform": "unknown", - "id": "win10/Breeze-13B-32k-Instruct-v1_0" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35843118481185476 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.46112304746712934 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.01283987915407855 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26426174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42019791666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2568151595744681 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MistralForCausalLM", - "params_billions": 12.726 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/win10/win10_Llama-3.2-3B-Instruct-24-9-29/bf253a63-4685-4e51-8a0d-5209306926c8.json b/leaderboard_data/HFOpenLLMv2/win10/win10_Llama-3.2-3B-Instruct-24-9-29/bf253a63-4685-4e51-8a0d-5209306926c8.json deleted file mode 100644 index db209ccd134e880ef0d444d0c793ec8dcbbd7980..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/win10/win10_Llama-3.2-3B-Instruct-24-9-29/bf253a63-4685-4e51-8a0d-5209306926c8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/win10_Llama-3.2-3B-Instruct-24-9-29/1762652580.594629", - "retrieved_timestamp": "1762652580.59463", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "win10/Llama-3.2-3B-Instruct-24-9-29", - "developer": "win10", - "inference_platform": "unknown", - "id": "win10/Llama-3.2-3B-Instruct-24-9-29" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7332211864519476 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4614234982167829 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.17069486404833836 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27432885906040266 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35552083333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3228058510638298 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/win10/win10_Qwen2.5-2B-Instruct/143dc973-1063-45d6-9747-9f24a9ae5657.json b/leaderboard_data/HFOpenLLMv2/win10/win10_Qwen2.5-2B-Instruct/143dc973-1063-45d6-9747-9f24a9ae5657.json deleted file mode 100644 index 40e21499f94f3b25c34d0f25e2d342d791a9f746..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/win10/win10_Qwen2.5-2B-Instruct/143dc973-1063-45d6-9747-9f24a9ae5657.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/win10_Qwen2.5-2B-Instruct/1762652580.5952861", - "retrieved_timestamp": "1762652580.595287", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "win10/Qwen2.5-2B-Instruct", - "developer": "win10", - "inference_platform": "unknown", - "id": "win10/Qwen2.5-2B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22728914834860392 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3705905854806977 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.022658610271903322 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2676174496644295 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43784375000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19340093085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 2.9 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/win10/win10_llama3-13.45b-Instruct/3c9eb291-6171-4d40-aa5f-58d39738fdcb.json b/leaderboard_data/HFOpenLLMv2/win10/win10_llama3-13.45b-Instruct/3c9eb291-6171-4d40-aa5f-58d39738fdcb.json deleted file mode 100644 index b09167c331659f56c91b90e46f52a3ae3dc715cc..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/win10/win10_llama3-13.45b-Instruct/3c9eb291-6171-4d40-aa5f-58d39738fdcb.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/win10_llama3-13.45b-Instruct/1762652580.595499", - "retrieved_timestamp": "1762652580.5955", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "win10/llama3-13.45b-Instruct", - "developer": "win10", - "inference_platform": "unknown", - "id": "win10/llama3-13.45b-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4144348107465968 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.486541523346714 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.02416918429003021 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38476041666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3345246010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 13.265 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/win10/win10_miscii-14b-1M-0128/c19f2ddd-7710-4844-9f1f-c0cd1c7e3e41.json b/leaderboard_data/HFOpenLLMv2/win10/win10_miscii-14b-1M-0128/c19f2ddd-7710-4844-9f1f-c0cd1c7e3e41.json deleted file mode 100644 index cee77305facdfe244d4846425c32be4f262268fb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/win10/win10_miscii-14b-1M-0128/c19f2ddd-7710-4844-9f1f-c0cd1c7e3e41.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/win10_miscii-14b-1M-0128/1762652580.5956988", - "retrieved_timestamp": "1762652580.5957", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "win10/miscii-14b-1M-0128", - "developer": "win10", - "inference_platform": "unknown", - "id": "win10/miscii-14b-1M-0128" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4180818007331658 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5741994518517665 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4773413897280967 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3825503355704698 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5431041666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.44913563829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 14.766 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/xMaulana/xMaulana_FinMatcha-3B-Instruct/105021c8-c214-4a6a-ac3b-747c4c48886e.json b/leaderboard_data/HFOpenLLMv2/xMaulana/xMaulana_FinMatcha-3B-Instruct/105021c8-c214-4a6a-ac3b-747c4c48886e.json deleted file mode 100644 index 0f6f4f189f0cf5e9b1922931a00910ce2b3c3c06..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/xMaulana/xMaulana_FinMatcha-3B-Instruct/105021c8-c214-4a6a-ac3b-747c4c48886e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/xMaulana_FinMatcha-3B-Instruct/1762652580.5969138", - "retrieved_timestamp": "1762652580.5969138", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "xMaulana/FinMatcha-3B-Instruct", - "developer": "xMaulana", - "inference_platform": "unknown", - "id": "xMaulana/FinMatcha-3B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7548283000217202 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.453555265188897 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14350453172205438 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26929530201342283 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36333333333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3181515957446808 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 3.213 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/xinchen9/xinchen9_llama3-b8-ft-dis/5ea3a084-bc30-4390-97a2-1933c5422790.json b/leaderboard_data/HFOpenLLMv2/xinchen9/xinchen9_llama3-b8-ft-dis/5ea3a084-bc30-4390-97a2-1933c5422790.json deleted file mode 100644 index f897275d8f284f44d55d9ca73ca9b6d3792fcb7b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/xinchen9/xinchen9_llama3-b8-ft-dis/5ea3a084-bc30-4390-97a2-1933c5422790.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/xinchen9_llama3-b8-ft-dis/1762652580.598142", - "retrieved_timestamp": "1762652580.598142", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "xinchen9/llama3-b8-ft-dis", - "developer": "xinchen9", - "inference_platform": "unknown", - "id": "xinchen9/llama3-b8-ft-dis" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.154598687039278 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4625789691224553 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.03927492447129909 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.31291946308724833 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.365375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3243849734042553 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/xkp24/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table/a9888e61-bd14-4769-b620-cda908c8ba3e.json b/leaderboard_data/HFOpenLLMv2/xkp24/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table/a9888e61-bd14-4769-b620-cda908c8ba3e.json deleted file mode 100644 index c2f0c0c0c0713bf2288b75fa60dc8ce62fb46e6b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/xkp24/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table/a9888e61-bd14-4769-b620-cda908c8ba3e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table/1762652580.598392", - "retrieved_timestamp": "1762652580.5983932", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table", - "developer": "xkp24", - "inference_platform": "unknown", - "id": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6374752323834094 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4912273915261041 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09214501510574018 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38199999999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3686003989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/xkp24/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table/99d6ac02-a8f8-409f-ad9d-ce5fd7ed6fe0.json b/leaderboard_data/HFOpenLLMv2/xkp24/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table/99d6ac02-a8f8-409f-ad9d-ce5fd7ed6fe0.json deleted file mode 100644 index 61b74758d6850707e34c000702fce7cc7e671d21..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/xkp24/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table/99d6ac02-a8f8-409f-ad9d-ce5fd7ed6fe0.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table/1762652580.598656", - "retrieved_timestamp": "1762652580.598656", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table", - "developer": "xkp24", - "inference_platform": "unknown", - "id": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7274509412802475 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5056858683165713 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08459214501510574 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38190624999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3696808510638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/xkp24/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table/71a54215-e97a-4ee6-928c-344bd690b020.json b/leaderboard_data/HFOpenLLMv2/xkp24/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table/71a54215-e97a-4ee6-928c-344bd690b020.json deleted file mode 100644 index 34e0c6ccd96341489b07521ecd0b987e2f0a21f9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/xkp24/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table/71a54215-e97a-4ee6-928c-344bd690b020.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table/1762652580.598878", - "retrieved_timestamp": "1762652580.5988789", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table", - "developer": "xkp24", - "inference_platform": "unknown", - "id": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6568593553992297 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49518319163897667 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35939583333333336 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37017952127659576 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/xkp24/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table/2fe15418-16bc-4f60-bad2-7329a3670507.json b/leaderboard_data/HFOpenLLMv2/xkp24/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table/2fe15418-16bc-4f60-bad2-7329a3670507.json deleted file mode 100644 index 0fb4745cac191190a60b5147d4800a704c7e731f..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/xkp24/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table/2fe15418-16bc-4f60-bad2-7329a3670507.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table/1762652580.599085", - "retrieved_timestamp": "1762652580.599086", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table", - "developer": "xkp24", - "inference_platform": "unknown", - "id": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6620799478716473 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.500449109241973 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08610271903323263 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3805416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3599567819148936 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/xkp24/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001/f6bcff0a-559b-44c1-9c70-259446b3ebe5.json b/leaderboard_data/HFOpenLLMv2/xkp24/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001/f6bcff0a-559b-44c1-9c70-259446b3ebe5.json deleted file mode 100644 index ffb14c17b7680ed51efeac5b1c3525cd9ccaa0fa..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/xkp24/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001/f6bcff0a-559b-44c1-9c70-259446b3ebe5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001/1762652580.599285", - "retrieved_timestamp": "1762652580.599286", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001", - "developer": "xkp24", - "inference_platform": "unknown", - "id": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6042278931014153 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4936062924421171 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3793333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.370844414893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/xkp24/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002/4deeeff7-f62d-4c42-b32a-98bdd773a758.json b/leaderboard_data/HFOpenLLMv2/xkp24/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002/4deeeff7-f62d-4c42-b32a-98bdd773a758.json deleted file mode 100644 index 5b9ccfcdfac77c07efda38bdef6e2c1a913408a8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/xkp24/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002/4deeeff7-f62d-4c42-b32a-98bdd773a758.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002/1762652580.599496", - "retrieved_timestamp": "1762652580.5994968", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002", - "developer": "xkp24", - "inference_platform": "unknown", - "id": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7131876753680235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4996376240562969 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08534743202416918 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3872083333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3664394946808511 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/xkp24/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001/8ec55b3f-e425-4ee9-98d5-dac775977514.json b/leaderboard_data/HFOpenLLMv2/xkp24/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001/8ec55b3f-e425-4ee9-98d5-dac775977514.json deleted file mode 100644 index 510c5668e1b83c8e10c9edfa83ad34db63e2d659..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/xkp24/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001/8ec55b3f-e425-4ee9-98d5-dac775977514.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001/1762652580.599715", - "retrieved_timestamp": "1762652580.599715", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001", - "developer": "xkp24", - "inference_platform": "unknown", - "id": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.594710922574325 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48992211803775065 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10725075528700906 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35809374999999993 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37042885638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/xkp24/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002/c583cff2-2944-4afb-b32e-c0f49bc0d3b7.json b/leaderboard_data/HFOpenLLMv2/xkp24/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002/c583cff2-2944-4afb-b32e-c0f49bc0d3b7.json deleted file mode 100644 index 008e02ea3f4dfc089563cd69e497c843937e86ff..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/xkp24/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002/c583cff2-2944-4afb-b32e-c0f49bc0d3b7.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/xkp24_Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002/1762652580.599936", - "retrieved_timestamp": "1762652580.599936", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002", - "developer": "xkp24", - "inference_platform": "unknown", - "id": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6453188650558297 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4951075713814987 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.393875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3529753989361702 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/xukp20/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table/a6996896-1464-4b55-a784-28deb06150c8.json b/leaderboard_data/HFOpenLLMv2/xukp20/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table/a6996896-1464-4b55-a784-28deb06150c8.json deleted file mode 100644 index cddf6b1437e95a4b561668ca6a21d9517a29872b..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/xukp20/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table/a6996896-1464-4b55-a784-28deb06150c8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table/1762652580.600162", - "retrieved_timestamp": "1762652580.600162", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table", - "developer": "xukp20", - "inference_platform": "unknown", - "id": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.575601625908146 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4901206199104098 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09969788519637462 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36596874999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36585771276595747 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/xukp20/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table/406f36fc-1243-4342-80c6-95b96fcc003f.json b/leaderboard_data/HFOpenLLMv2/xukp20/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table/406f36fc-1243-4342-80c6-95b96fcc003f.json deleted file mode 100644 index d4b99303ce3cfb8550f4c4edfd9691b33ac6de69..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/xukp20/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table/406f36fc-1243-4342-80c6-95b96fcc003f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table/1762652580.600485", - "retrieved_timestamp": "1762652580.6004858", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table", - "developer": "xukp20", - "inference_platform": "unknown", - "id": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7034457461757027 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5091868512191421 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09667673716012085 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37390624999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3692652925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/xukp20/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table/87bcbd57-2d0e-4d77-9f1e-3ec0199c8452.json b/leaderboard_data/HFOpenLLMv2/xukp20/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table/87bcbd57-2d0e-4d77-9f1e-3ec0199c8452.json deleted file mode 100644 index c3f6ae0e66e382ccc490054fcf641aefc4dd1e3c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/xukp20/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table/87bcbd57-2d0e-4d77-9f1e-3ec0199c8452.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table/1762652580.6007009", - "retrieved_timestamp": "1762652580.6007009", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table", - "developer": "xukp20", - "inference_platform": "unknown", - "id": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6023794642659255 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49695315361511977 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36736458333333327 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3657746010638298 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/xukp20/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table/d7125235-7b17-4a90-9125-c993646cd7c8.json b/leaderboard_data/HFOpenLLMv2/xukp20/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table/d7125235-7b17-4a90-9125-c993646cd7c8.json deleted file mode 100644 index f4aa246b655356f8c2e5ee0bcaba5a9b6ac56d86..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/xukp20/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table/d7125235-7b17-4a90-9125-c993646cd7c8.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table/1762652580.600907", - "retrieved_timestamp": "1762652580.600908", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table", - "developer": "xukp20", - "inference_platform": "unknown", - "id": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6620300801872365 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49999369392208165 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38181249999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3614527925531915 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/xukp20/xukp20_Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001/d758e9a9-c316-4de5-bdb7-d0ec7401fa12.json b/leaderboard_data/HFOpenLLMv2/xukp20/xukp20_Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001/d758e9a9-c316-4de5-bdb7-d0ec7401fa12.json deleted file mode 100644 index b9e6a6c60c882f2d461050268fe5ff3195505c90..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/xukp20/xukp20_Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001/d758e9a9-c316-4de5-bdb7-d0ec7401fa12.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001/1762652580.601125", - "retrieved_timestamp": "1762652580.601126", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001", - "developer": "xukp20", - "inference_platform": "unknown", - "id": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5336363072203975 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49148727192613517 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09818731117824774 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37796874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3624501329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/xukp20/xukp20_Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002/d1445003-91ea-4b2b-ab38-a47a6392620e.json b/leaderboard_data/HFOpenLLMv2/xukp20/xukp20_Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002/d1445003-91ea-4b2b-ab38-a47a6392620e.json deleted file mode 100644 index e685fd11ccf06b48279a1d84a9977812ef489dad..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/xukp20/xukp20_Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002/d1445003-91ea-4b2b-ab38-a47a6392620e.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002/1762652580.601484", - "retrieved_timestamp": "1762652580.6014872", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002", - "developer": "xukp20", - "inference_platform": "unknown", - "id": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6851609285584471 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.507516320435292 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07175226586102719 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25838926174496646 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3831770833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3621176861702128 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/xukp20/xukp20_Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001/4d9c2e04-caef-43f5-9ce1-40517341ff40.json b/leaderboard_data/HFOpenLLMv2/xukp20/xukp20_Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001/4d9c2e04-caef-43f5-9ce1-40517341ff40.json deleted file mode 100644 index ef6a2116dc7549ec2686731629b6d6c4a9376b26..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/xukp20/xukp20_Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001/4d9c2e04-caef-43f5-9ce1-40517341ff40.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/xukp20_Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001/1762652580.601857", - "retrieved_timestamp": "1762652580.6018581", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001", - "developer": "xukp20", - "inference_platform": "unknown", - "id": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5482242671666733 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.48871746894288526 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0891238670694864 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3632708333333334 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36710438829787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/xukp20/xukp20_llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table/5d53b35f-6bff-493c-805d-b45517ae0e2b.json b/leaderboard_data/HFOpenLLMv2/xukp20/xukp20_llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table/5d53b35f-6bff-493c-805d-b45517ae0e2b.json deleted file mode 100644 index f8f807671e565d12d09e273c43d9d879c226233c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/xukp20/xukp20_llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table/5d53b35f-6bff-493c-805d-b45517ae0e2b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/xukp20_llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table/1762652580.602122", - "retrieved_timestamp": "1762652580.602124", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "xukp20/llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table", - "developer": "xukp20", - "inference_platform": "unknown", - "id": "xukp20/llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6900069593124022 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4978456981516493 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10498489425981873 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3673333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37159242021276595 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/xwen-team/xwen-team_Xwen-7B-Chat/a099778d-4c47-472e-872d-8fffcdf2764f.json b/leaderboard_data/HFOpenLLMv2/xwen-team/xwen-team_Xwen-7B-Chat/a099778d-4c47-472e-872d-8fffcdf2764f.json deleted file mode 100644 index b640403382837841cb751788f101094fc943008a..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/xwen-team/xwen-team_Xwen-7B-Chat/a099778d-4c47-472e-872d-8fffcdf2764f.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/xwen-team_Xwen-7B-Chat/1762652580.602432", - "retrieved_timestamp": "1762652580.602433", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "xwen-team/Xwen-7B-Chat", - "developer": "xwen-team", - "inference_platform": "unknown", - "id": "xwen-team/Xwen-7B-Chat" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6864098370102439 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.506762793166296 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4509063444108761 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2609060402684564 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3914270833333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42902260638297873 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 7.616 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/yam-peleg/yam-peleg_Hebrew-Gemma-11B-Instruct/5d25872d-eacd-4e5c-b9cc-9ee014147730.json b/leaderboard_data/HFOpenLLMv2/yam-peleg/yam-peleg_Hebrew-Gemma-11B-Instruct/5d25872d-eacd-4e5c-b9cc-9ee014147730.json deleted file mode 100644 index 471d7c78f823f0bc4dbde3cd9e066be46ffe8bd9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/yam-peleg/yam-peleg_Hebrew-Gemma-11B-Instruct/5d25872d-eacd-4e5c-b9cc-9ee014147730.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/yam-peleg_Hebrew-Gemma-11B-Instruct/1762652580.603103", - "retrieved_timestamp": "1762652580.603105", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "yam-peleg/Hebrew-Gemma-11B-Instruct", - "developer": "yam-peleg", - "inference_platform": "unknown", - "id": "yam-peleg/Hebrew-Gemma-11B-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30207737691547315 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40357843109818686 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06570996978851963 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.276006711409396 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4088541666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25540226063829785 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "GemmaForCausalLM", - "params_billions": 10.475 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/yanng1242/yanng1242_Marcoro14-7B-slerp/f5005cc2-cec4-4a1c-be09-a670d996d15b.json b/leaderboard_data/HFOpenLLMv2/yanng1242/yanng1242_Marcoro14-7B-slerp/f5005cc2-cec4-4a1c-be09-a670d996d15b.json deleted file mode 100644 index b9b8a3442df34a14df9ecc54a573aa375ef19b42..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/yanng1242/yanng1242_Marcoro14-7B-slerp/f5005cc2-cec4-4a1c-be09-a670d996d15b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/yanng1242_Marcoro14-7B-slerp/1762652580.604092", - "retrieved_timestamp": "1762652580.604092", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "yanng1242/Marcoro14-7B-slerp", - "developer": "yanng1242", - "inference_platform": "unknown", - "id": "yanng1242/Marcoro14-7B-slerp" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4059916576904835 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5251655292981787 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07477341389728097 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3145973154362416 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.468625 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3168218085106383 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MistralForCausalLM", - "params_billions": 7.242 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/yasserrmd/yasserrmd_Coder-GRPO-3B/425372c0-e096-4bdf-8f6c-eb2d5b36bb07.json b/leaderboard_data/HFOpenLLMv2/yasserrmd/yasserrmd_Coder-GRPO-3B/425372c0-e096-4bdf-8f6c-eb2d5b36bb07.json deleted file mode 100644 index 796c23299bc926dea6556fe510baecffb4f0dc41..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/yasserrmd/yasserrmd_Coder-GRPO-3B/425372c0-e096-4bdf-8f6c-eb2d5b36bb07.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/yasserrmd_Coder-GRPO-3B/1762652580.6044621", - "retrieved_timestamp": "1762652580.604463", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "yasserrmd/Coder-GRPO-3B", - "developer": "yasserrmd", - "inference_platform": "unknown", - "id": "yasserrmd/Coder-GRPO-3B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6207640172520024 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4469120364616385 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3202416918429003 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.27768456375838924 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4114583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3197307180851064 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 3.086 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/yasserrmd/yasserrmd_Text2SQL-1.5B/42a767cf-7d29-486d-b83e-fcfa51f048c1.json b/leaderboard_data/HFOpenLLMv2/yasserrmd/yasserrmd_Text2SQL-1.5B/42a767cf-7d29-486d-b83e-fcfa51f048c1.json deleted file mode 100644 index cacc260f4a8c8276de14d46b58bce162e3485035..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/yasserrmd/yasserrmd_Text2SQL-1.5B/42a767cf-7d29-486d-b83e-fcfa51f048c1.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/yasserrmd_Text2SQL-1.5B/1762652580.604796", - "retrieved_timestamp": "1762652580.6047978", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "yasserrmd/Text2SQL-1.5B", - "developer": "yasserrmd", - "inference_platform": "unknown", - "id": "yasserrmd/Text2SQL-1.5B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2857407235025289 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38577157961565695 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06797583081570997 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.287751677852349 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39423958333333337 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23628656914893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.544 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ycros/ycros_BagelMIsteryTour-v2-8x7B/2419f2a3-03df-4521-9baa-346e3cb53181.json b/leaderboard_data/HFOpenLLMv2/ycros/ycros_BagelMIsteryTour-v2-8x7B/2419f2a3-03df-4521-9baa-346e3cb53181.json deleted file mode 100644 index 3d96e595e43950bf761114e61a0d59f114bc99bb..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ycros/ycros_BagelMIsteryTour-v2-8x7B/2419f2a3-03df-4521-9baa-346e3cb53181.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ycros_BagelMIsteryTour-v2-8x7B/1762652580.605103", - "retrieved_timestamp": "1762652580.6051042", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ycros/BagelMIsteryTour-v2-8x7B", - "developer": "ycros", - "inference_platform": "unknown", - "id": "ycros/BagelMIsteryTour-v2-8x7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.599431730031871 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.515923595752544 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07854984894259819 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30453020134228187 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4202916666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.34732380319148937 - } - } - ], - "additional_details": { - "precision": "float16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ycros/ycros_BagelMIsteryTour-v2-8x7B/a88e7110-2a58-4f47-801f-2a49037eaed6.json b/leaderboard_data/HFOpenLLMv2/ycros/ycros_BagelMIsteryTour-v2-8x7B/a88e7110-2a58-4f47-801f-2a49037eaed6.json deleted file mode 100644 index d99a0fb4ce6486562c4108365ad3944aa3aac1fe..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ycros/ycros_BagelMIsteryTour-v2-8x7B/a88e7110-2a58-4f47-801f-2a49037eaed6.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ycros_BagelMIsteryTour-v2-8x7B/1762652580.605396", - "retrieved_timestamp": "1762652580.605397", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ycros/BagelMIsteryTour-v2-8x7B", - "developer": "ycros", - "inference_platform": "unknown", - "id": "ycros/BagelMIsteryTour-v2-8x7B" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6262095683896506 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5141943573573103 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30788590604026844 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.41375 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3480718085106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 46.703 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/yfzp/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table/cd2f94a5-595a-469e-b34e-a5f9abb82e6b.json b/leaderboard_data/HFOpenLLMv2/yfzp/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table/cd2f94a5-595a-469e-b34e-a5f9abb82e6b.json deleted file mode 100644 index 57e5e98f2212e73c28c890aa8b01ab0720c67aee..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/yfzp/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table/cd2f94a5-595a-469e-b34e-a5f9abb82e6b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table/1762652580.605642", - "retrieved_timestamp": "1762652580.605643", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table", - "developer": "yfzp", - "inference_platform": "unknown", - "id": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6708976626462231 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49866134349131935 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11178247734138973 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37269791666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37159242021276595 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/yfzp/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table/c19ed336-aadf-4af3-a0e5-1c1946a17ce4.json b/leaderboard_data/HFOpenLLMv2/yfzp/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table/c19ed336-aadf-4af3-a0e5-1c1946a17ce4.json deleted file mode 100644 index b800a22aa9efdd964bc87bbbaf0dcaf0d3e733da..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/yfzp/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table/c19ed336-aadf-4af3-a0e5-1c1946a17ce4.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table/1762652580.605978", - "retrieved_timestamp": "1762652580.605979", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table", - "developer": "yfzp", - "inference_platform": "unknown", - "id": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7332710541363582 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5080359954971677 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10347432024169184 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38060416666666663 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3748337765957447 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/yfzp/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table/d6cadac8-17a9-430f-94b3-6eb0c7ecc146.json b/leaderboard_data/HFOpenLLMv2/yfzp/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table/d6cadac8-17a9-430f-94b3-6eb0c7ecc146.json deleted file mode 100644 index ba825826dc8839cfa7a416ff3ce4cf5d5acb925d..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/yfzp/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table/d6cadac8-17a9-430f-94b3-6eb0c7ecc146.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table/1762652580.60626", - "retrieved_timestamp": "1762652580.606261", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table", - "developer": "yfzp", - "inference_platform": "unknown", - "id": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6784664689690023 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49412091896520455 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11253776435045318 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3646666666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37175864361702127 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/yfzp/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table/0bdeac20-0505-459e-b417-ea4cb2f95cec.json b/leaderboard_data/HFOpenLLMv2/yfzp/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table/0bdeac20-0505-459e-b417-ea4cb2f95cec.json deleted file mode 100644 index e2737ae23f765ba667892181bd74039d22d4cb47..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/yfzp/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table/0bdeac20-0505-459e-b417-ea4cb2f95cec.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table/1762652580.6064892", - "retrieved_timestamp": "1762652580.6064901", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table", - "developer": "yfzp", - "inference_platform": "unknown", - "id": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7131876753680235 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5025359954971677 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09894259818731117 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3713333333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36826795212765956 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/yfzp/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001/b1ad6a57-8cad-4cca-8dd6-00ebd35089ab.json b/leaderboard_data/HFOpenLLMv2/yfzp/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001/b1ad6a57-8cad-4cca-8dd6-00ebd35089ab.json deleted file mode 100644 index d3a3cd5322d36e3509b00f510237c47de88d302e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/yfzp/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001/b1ad6a57-8cad-4cca-8dd6-00ebd35089ab.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001/1762652580.606723", - "retrieved_timestamp": "1762652580.606724", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001", - "developer": "yfzp", - "inference_platform": "unknown", - "id": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6495653754260917 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4979459532536201 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10120845921450151 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37796874999999996 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37200797872340424 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/yfzp/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002/249af8cd-717b-4ee9-8ac7-740f16708675.json b/leaderboard_data/HFOpenLLMv2/yfzp/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002/249af8cd-717b-4ee9-8ac7-740f16708675.json deleted file mode 100644 index 5098b588eec02ecf7a6d7a81223754107eb2b16c..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/yfzp/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002/249af8cd-717b-4ee9-8ac7-740f16708675.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002/1762652580.6069329", - "retrieved_timestamp": "1762652580.606934", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002", - "developer": "yfzp", - "inference_platform": "unknown", - "id": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7196073086078272 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5045147424411157 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08761329305135952 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2600671140939597 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3831458333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3734208776595745 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/yfzp/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001/338737c7-29cf-44d8-be92-6749167b7c03.json b/leaderboard_data/HFOpenLLMv2/yfzp/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001/338737c7-29cf-44d8-be92-6749167b7c03.json deleted file mode 100644 index 571a219f6c2b3774475f407dec489bafd86997f6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/yfzp/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001/338737c7-29cf-44d8-be92-6749167b7c03.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001/1762652580.6072068", - "retrieved_timestamp": "1762652580.6072068", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001", - "developer": "yfzp", - "inference_platform": "unknown", - "id": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6504397221594258 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49578758563187125 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09365558912386707 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36603125 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3702626329787234 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/yfzp/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002/aa12336f-556c-4222-a10c-529eb74a793b.json b/leaderboard_data/HFOpenLLMv2/yfzp/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002/aa12336f-556c-4222-a10c-529eb74a793b.json deleted file mode 100644 index f53af50664cd095a4eb62ebed0db25ca3dc1fa38..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/yfzp/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002/aa12336f-556c-4222-a10c-529eb74a793b.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/yfzp_Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002/1762652580.607418", - "retrieved_timestamp": "1762652580.6074188", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002", - "developer": "yfzp", - "inference_platform": "unknown", - "id": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7015973173402128 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4991547169583548 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08685800604229607 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25922818791946306 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.37790624999999994 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.366938164893617 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/yifAI/yifAI_Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002/79fad1b7-c458-4f89-9d7a-d58f70ba6c90.json b/leaderboard_data/HFOpenLLMv2/yifAI/yifAI_Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002/79fad1b7-c458-4f89-9d7a-d58f70ba6c90.json deleted file mode 100644 index c8e6a260ac435929ea15155383433d2b6081f771..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/yifAI/yifAI_Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002/79fad1b7-c458-4f89-9d7a-d58f70ba6c90.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/yifAI_Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002/1762652580.6077929", - "retrieved_timestamp": "1762652580.607796", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "yifAI/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002", - "developer": "yifAI", - "inference_platform": "unknown", - "id": "yifAI/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6489658550423987 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.49145217071254876 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0755287009063444 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26174496644295303 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38987499999999997 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3519780585106383 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ylalain/ylalain_ECE-PRYMMAL-YL-1B-SLERP-V8/5e4e3c08-71cd-4241-bfe9-bc242f0cc32a.json b/leaderboard_data/HFOpenLLMv2/ylalain/ylalain_ECE-PRYMMAL-YL-1B-SLERP-V8/5e4e3c08-71cd-4241-bfe9-bc242f0cc32a.json deleted file mode 100644 index 88b695edb550e2019734fc449bd0a4679dfb0bf9..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ylalain/ylalain_ECE-PRYMMAL-YL-1B-SLERP-V8/5e4e3c08-71cd-4241-bfe9-bc242f0cc32a.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ylalain_ECE-PRYMMAL-YL-1B-SLERP-V8/1762652580.608171", - "retrieved_timestamp": "1762652580.608172", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ylalain/ECE-PRYMMAL-YL-1B-SLERP-V8", - "developer": "ylalain", - "inference_platform": "unknown", - "id": "ylalain/ECE-PRYMMAL-YL-1B-SLERP-V8" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15052726764983576 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3975573100103517 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.004531722054380665 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28942953020134227 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3874583333333333 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.23836436170212766 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 1.357 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ymcki/ymcki_Llama-3.1-8B-GRPO-Instruct/cb38b3bb-6188-430f-b863-9bf86cc877f9.json b/leaderboard_data/HFOpenLLMv2/ymcki/ymcki_Llama-3.1-8B-GRPO-Instruct/cb38b3bb-6188-430f-b863-9bf86cc877f9.json deleted file mode 100644 index aac2b57a3294d68194be227abc95230f0fcf0e57..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ymcki/ymcki_Llama-3.1-8B-GRPO-Instruct/cb38b3bb-6188-430f-b863-9bf86cc877f9.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ymcki_Llama-3.1-8B-GRPO-Instruct/1762652580.608475", - "retrieved_timestamp": "1762652580.608476", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ymcki/Llama-3.1-8B-GRPO-Instruct", - "developer": "ymcki", - "inference_platform": "unknown", - "id": "ymcki/Llama-3.1-8B-GRPO-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.744536718130117 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5131586337530801 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.20241691842900303 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.29446308724832215 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38165625000000003 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3738364361702128 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/ymcki/ymcki_Llama-3.1-8B-SFT-GRPO-Instruct/938af657-ca9b-4400-84e1-002065f92f84.json b/leaderboard_data/HFOpenLLMv2/ymcki/ymcki_Llama-3.1-8B-SFT-GRPO-Instruct/938af657-ca9b-4400-84e1-002065f92f84.json deleted file mode 100644 index 265d38f160f2b0d1d66a93038a73c7e5f8fcd4e6..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/ymcki/ymcki_Llama-3.1-8B-SFT-GRPO-Instruct/938af657-ca9b-4400-84e1-002065f92f84.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/ymcki_Llama-3.1-8B-SFT-GRPO-Instruct/1762652580.608792", - "retrieved_timestamp": "1762652580.608793", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ymcki/Llama-3.1-8B-SFT-GRPO-Instruct", - "developer": "ymcki", - "inference_platform": "unknown", - "id": "ymcki/Llama-3.1-8B-SFT-GRPO-Instruct" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.33540007180946557 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3126261967336083 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04003021148036254 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2533557046979866 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.35260416666666666 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10979055851063829 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/yuvraj17/yuvraj17_Llama3-8B-SuperNova-Spectrum-Hermes-DPO/d22c83a1-9c1c-43df-b033-c6cb75cb389d.json b/leaderboard_data/HFOpenLLMv2/yuvraj17/yuvraj17_Llama3-8B-SuperNova-Spectrum-Hermes-DPO/d22c83a1-9c1c-43df-b033-c6cb75cb389d.json deleted file mode 100644 index 9e51af3a8d3e315de0450045ffd7d0664cc610f8..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/yuvraj17/yuvraj17_Llama3-8B-SuperNova-Spectrum-Hermes-DPO/d22c83a1-9c1c-43df-b033-c6cb75cb389d.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/yuvraj17_Llama3-8B-SuperNova-Spectrum-Hermes-DPO/1762652580.611586", - "retrieved_timestamp": "1762652580.611586", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "yuvraj17/Llama3-8B-SuperNova-Spectrum-Hermes-DPO", - "developer": "yuvraj17", - "inference_platform": "unknown", - "id": "yuvraj17/Llama3-8B-SuperNova-Spectrum-Hermes-DPO" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4690897928607206 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4399870586095269 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.05664652567975831 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30201342281879195 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40121875 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2634640957446808 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "LlamaForCausalLM", - "params_billions": 8.03 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/zake7749/zake7749_gemma-2-2b-it-chinese-kyara-dpo/4fbaf39a-86a1-4b79-aeeb-e14c2de64666.json b/leaderboard_data/HFOpenLLMv2/zake7749/zake7749_gemma-2-2b-it-chinese-kyara-dpo/4fbaf39a-86a1-4b79-aeeb-e14c2de64666.json deleted file mode 100644 index e4e908b219b01d83e587210ec4f31f5e57ffbd4e..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/zake7749/zake7749_gemma-2-2b-it-chinese-kyara-dpo/4fbaf39a-86a1-4b79-aeeb-e14c2de64666.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zake7749_gemma-2-2b-it-chinese-kyara-dpo/1762652580.612313", - "retrieved_timestamp": "1762652580.6123142", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zake7749/gemma-2-2b-it-chinese-kyara-dpo", - "developer": "zake7749", - "inference_platform": "unknown", - "id": "zake7749/gemma-2-2b-it-chinese-kyara-dpo" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5382075116247114 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4257464897414603 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08383685800604229 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.26677852348993286 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.45756250000000004 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.25731382978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 2.614 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/zelk12/zelk12_Test01012025155054/e25f6fa3-238e-4bc3-b6ce-cdc2bc728d9c.json b/leaderboard_data/HFOpenLLMv2/zelk12/zelk12_Test01012025155054/e25f6fa3-238e-4bc3-b6ce-cdc2bc728d9c.json deleted file mode 100644 index 9dab7a8f990c5890349aa1ceaed398bbf9109483..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/zelk12/zelk12_Test01012025155054/e25f6fa3-238e-4bc3-b6ce-cdc2bc728d9c.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zelk12_Test01012025155054/1762652580.6282592", - "retrieved_timestamp": "1762652580.6282601", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zelk12/Test01012025155054", - "developer": "zelk12", - "inference_platform": "unknown", - "id": "zelk12/Test01012025155054" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1555229014570229 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28295044895258115 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.24161073825503357 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36702083333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.10904255319148937 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Gemma2ForCausalLM", - "params_billions": 3.817 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/zetasepic/zetasepic_Qwen2.5-32B-Instruct-abliterated-v2/a5490bf2-6d11-4474-b6e5-07a79d30f431.json b/leaderboard_data/HFOpenLLMv2/zetasepic/zetasepic_Qwen2.5-32B-Instruct-abliterated-v2/a5490bf2-6d11-4474-b6e5-07a79d30f431.json deleted file mode 100644 index 5a7d2d3db0bd98777f324c5b8082c7f106c3cdd0..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/zetasepic/zetasepic_Qwen2.5-32B-Instruct-abliterated-v2/a5490bf2-6d11-4474-b6e5-07a79d30f431.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zetasepic_Qwen2.5-32B-Instruct-abliterated-v2/1762652580.6318998", - "retrieved_timestamp": "1762652580.631902", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zetasepic/Qwen2.5-32B-Instruct-abliterated-v2", - "developer": "zetasepic", - "inference_platform": "unknown", - "id": "zetasepic/Qwen2.5-32B-Instruct-abliterated-v2" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8334131216283904 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6934020817780425 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.595166163141994 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.3674496644295302 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.43542708333333335 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5621675531914894 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 32.764 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/zetasepic/zetasepic_Qwen2.5-72B-Instruct-abliterated/78799fe1-5fbd-4023-9462-8d826dac41d5.json b/leaderboard_data/HFOpenLLMv2/zetasepic/zetasepic_Qwen2.5-72B-Instruct-abliterated/78799fe1-5fbd-4023-9462-8d826dac41d5.json deleted file mode 100644 index a2ae860d1bc5b88051a816aad7668cdea2e18863..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/zetasepic/zetasepic_Qwen2.5-72B-Instruct-abliterated/78799fe1-5fbd-4023-9462-8d826dac41d5.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zetasepic_Qwen2.5-72B-Instruct-abliterated/1762652580.632342", - "retrieved_timestamp": "1762652580.632343", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zetasepic/Qwen2.5-72B-Instruct-abliterated", - "developer": "zetasepic", - "inference_platform": "unknown", - "id": "zetasepic/Qwen2.5-72B-Instruct-abliterated" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7152610628687439 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7152257183282452 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5241691842900302 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.40687919463087246 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4719166666666667 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5871841755319149 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "Qwen2ForCausalLM", - "params_billions": 72.706 - } -} \ No newline at end of file diff --git a/leaderboard_data/HFOpenLLMv2/zhengr/zhengr_MixTAO-7Bx2-MoE-v8.1/35068575-06a3-4541-bdf3-120bd6db2867.json b/leaderboard_data/HFOpenLLMv2/zhengr/zhengr_MixTAO-7Bx2-MoE-v8.1/35068575-06a3-4541-bdf3-120bd6db2867.json deleted file mode 100644 index ea9229fd607fa25d8ca6fbae95ab29fc4fefc4a7..0000000000000000000000000000000000000000 --- a/leaderboard_data/HFOpenLLMv2/zhengr/zhengr_MixTAO-7Bx2-MoE-v8.1/35068575-06a3-4541-bdf3-120bd6db2867.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "hfopenllm_v2/zhengr_MixTAO-7Bx2-MoE-v8.1/1762652580.6327481", - "retrieved_timestamp": "1762652580.632749", - "source_data": [ - "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted" - ], - "evaluation_source": { - "evaluation_source_name": "HF Open LLM v2", - "evaluation_source_type": "leaderboard" - }, - "source_metadata": { - "source_organization_name": "Hugging Face", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "zhengr/MixTAO-7Bx2-MoE-v8.1", - "developer": "zhengr", - "inference_platform": "unknown", - "id": "zhengr/MixTAO-7Bx2-MoE-v8.1" - }, - "evaluation_results": [ - { - "evaluation_name": "IFEval", - "metric_config": { - "evaluation_description": "Accuracy on IFEval", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4187810564856802 - } - }, - { - "evaluation_name": "BBH", - "metric_config": { - "evaluation_description": "Accuracy on BBH", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.42019437560239653 - } - }, - { - "evaluation_name": "MATH Level 5", - "metric_config": { - "evaluation_description": "Exact Match on MATH Level 5", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.06042296072507553 - } - }, - { - "evaluation_name": "GPQA", - "metric_config": { - "evaluation_description": "Accuracy on GPQA", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2986577181208054 - } - }, - { - "evaluation_name": "MUSR", - "metric_config": { - "evaluation_description": "Accuracy on MUSR", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.39762499999999995 - } - }, - { - "evaluation_name": "MMLU-PRO", - "metric_config": { - "evaluation_description": "Accuracy on MMLU-PRO", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28465757978723405 - } - } - ], - "additional_details": { - "precision": "bfloat16", - "architecture": "MixtralForCausalLM", - "params_billions": 12.879 - } -} \ No newline at end of file diff --git a/leaderboard_data/LiveCodeBenchPro/Alibaba/alibaba_qwen3-235b-a22b-thinking-2507/126326f3-6521-45d1-aa14-5c51335c1929.json b/leaderboard_data/LiveCodeBenchPro/Alibaba/alibaba_qwen3-235b-a22b-thinking-2507/126326f3-6521-45d1-aa14-5c51335c1929.json deleted file mode 100644 index fda7bc448b6877aee9ab36e869f054d159a97087..0000000000000000000000000000000000000000 --- a/leaderboard_data/LiveCodeBenchPro/Alibaba/alibaba_qwen3-235b-a22b-thinking-2507/126326f3-6521-45d1-aa14-5c51335c1929.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "livecodebenchpro/qwen3-235b-a22b-thinking-2507/1760492095.8105888", - "retrieved_timestamp": "1760492095.8105888", - "source_metadata": { - "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qwen3-235b-a22b-thinking-2507", - "developer": "Alibaba", - "inference_platform": "aliyun", - "id": "alibaba/qwen3-235b-a22b-thinking-2507" - }, - "evaluation_results": [ - { - "evaluation_name": "Hard Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Hard Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Medium Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Medium Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1267605633802817 - } - }, - { - "evaluation_name": "Easy Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Easy Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7605633802816901 - } - } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" - ], - "evaluation_source": { - "evaluation_source_name": "Live Code Bench Pro", - "evaluation_source_type": "leaderboard" - } -} \ No newline at end of file diff --git a/leaderboard_data/LiveCodeBenchPro/Alibaba/alibaba_qwen3-30b-a3b/b3f5937a-1489-417b-8162-6c62dea0703d.json b/leaderboard_data/LiveCodeBenchPro/Alibaba/alibaba_qwen3-30b-a3b/b3f5937a-1489-417b-8162-6c62dea0703d.json deleted file mode 100644 index 522836f0810445b2aedc7f331900eb33d896aa93..0000000000000000000000000000000000000000 --- a/leaderboard_data/LiveCodeBenchPro/Alibaba/alibaba_qwen3-30b-a3b/b3f5937a-1489-417b-8162-6c62dea0703d.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "livecodebenchpro/qwen3-30b-a3b/1760492095.8105888", - "retrieved_timestamp": "1760492095.8105888", - "source_metadata": { - "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qwen3-30b-a3b", - "developer": "Alibaba", - "inference_platform": "aliyun", - "id": "alibaba/qwen3-30b-a3b" - }, - "evaluation_results": [ - { - "evaluation_name": "Hard Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Hard Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Medium Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Medium Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028169014084507043 - } - }, - { - "evaluation_name": "Easy Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Easy Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5774647887323944 - } - } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" - ], - "evaluation_source": { - "evaluation_source_name": "Live Code Bench Pro", - "evaluation_source_type": "leaderboard" - } -} \ No newline at end of file diff --git a/leaderboard_data/LiveCodeBenchPro/Alibaba/alibaba_qwen3-max/f06d6c4c-b2c4-4c48-9702-f0bf08af62c4.json b/leaderboard_data/LiveCodeBenchPro/Alibaba/alibaba_qwen3-max/f06d6c4c-b2c4-4c48-9702-f0bf08af62c4.json deleted file mode 100644 index 46285af90d8dd02867110925e41c5cf695ab5cb8..0000000000000000000000000000000000000000 --- a/leaderboard_data/LiveCodeBenchPro/Alibaba/alibaba_qwen3-max/f06d6c4c-b2c4-4c48-9702-f0bf08af62c4.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "livecodebenchpro/alibaba/qwen3-max/1760492095.8105888", - "retrieved_timestamp": "1760492095.8105888", - "source_metadata": { - "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "alibaba/qwen3-max", - "developer": "Alibaba", - "inference_platform": "openrouter", - "id": "alibaba/qwen3-max" - }, - "evaluation_results": [ - { - "evaluation_name": "Hard Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Hard Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Medium Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Medium Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04225352112676056 - } - }, - { - "evaluation_name": "Easy Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Easy Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.36619718309859156 - } - } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" - ], - "evaluation_source": { - "evaluation_source_name": "Live Code Bench Pro", - "evaluation_source_type": "leaderboard" - } -} \ No newline at end of file diff --git a/leaderboard_data/LiveCodeBenchPro/Alibaba/alibaba_qwen3-next-80b-a3b-thinking/809a1503-a161-4532-afd3-fdbd6551eb63.json b/leaderboard_data/LiveCodeBenchPro/Alibaba/alibaba_qwen3-next-80b-a3b-thinking/809a1503-a161-4532-afd3-fdbd6551eb63.json deleted file mode 100644 index 966a77e862484373e59895588b90333c4c1e0bb2..0000000000000000000000000000000000000000 --- a/leaderboard_data/LiveCodeBenchPro/Alibaba/alibaba_qwen3-next-80b-a3b-thinking/809a1503-a161-4532-afd3-fdbd6551eb63.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "livecodebenchpro/qwen3-next-80b-a3b-thinking/1760492095.8105888", - "retrieved_timestamp": "1760492095.8105888", - "source_metadata": { - "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "qwen3-next-80b-a3b-thinking", - "developer": "Alibaba", - "inference_platform": "aliyun", - "id": "alibaba/qwen3-next-80b-a3b-thinking" - }, - "evaluation_results": [ - { - "evaluation_name": "Hard Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Hard Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Medium Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Medium Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.14084507042253522 - } - }, - { - "evaluation_name": "Easy Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Easy Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7464788732394366 - } - } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" - ], - "evaluation_source": { - "evaluation_source_name": "Live Code Bench Pro", - "evaluation_source_type": "leaderboard" - } -} \ No newline at end of file diff --git a/leaderboard_data/LiveCodeBenchPro/Anthropic/anthropic_claude-3-7-sonnet-20250219/be076445-eb88-49b0-a855-2e0cb1551bab.json b/leaderboard_data/LiveCodeBenchPro/Anthropic/anthropic_claude-3-7-sonnet-20250219/be076445-eb88-49b0-a855-2e0cb1551bab.json deleted file mode 100644 index 215719be2efb9b8d8f629342258c0b0fe16a8ea0..0000000000000000000000000000000000000000 --- a/leaderboard_data/LiveCodeBenchPro/Anthropic/anthropic_claude-3-7-sonnet-20250219/be076445-eb88-49b0-a855-2e0cb1551bab.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "livecodebenchpro/claude-3-7-sonnet-20250219/1760492095.8105888", - "retrieved_timestamp": "1760492095.8105888", - "source_metadata": { - "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "claude-3-7-sonnet-20250219", - "developer": "Anthropic", - "inference_platform": "anthropic", - "id": "anthropic/claude-3-7-sonnet-20250219" - }, - "evaluation_results": [ - { - "evaluation_name": "Hard Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Hard Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Medium Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Medium Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Easy Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Easy Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.28169014084507044 - } - } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" - ], - "evaluation_source": { - "evaluation_source_name": "Live Code Bench Pro", - "evaluation_source_type": "leaderboard" - } -} \ No newline at end of file diff --git a/leaderboard_data/LiveCodeBenchPro/Anthropic/anthropic_claude-3.7-sonnet/69210faf-04a8-46d4-b92b-94f2ca521c09.json b/leaderboard_data/LiveCodeBenchPro/Anthropic/anthropic_claude-3.7-sonnet/69210faf-04a8-46d4-b92b-94f2ca521c09.json deleted file mode 100644 index 185d092d50e66c714769456a840bc0d93fe34947..0000000000000000000000000000000000000000 --- a/leaderboard_data/LiveCodeBenchPro/Anthropic/anthropic_claude-3.7-sonnet/69210faf-04a8-46d4-b92b-94f2ca521c09.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "livecodebenchpro/anthropic/claude-3.7-sonnet/1760492095.8105888", - "retrieved_timestamp": "1760492095.8105888", - "source_metadata": { - "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "anthropic/claude-3.7-sonnet", - "developer": "Anthropic", - "inference_platform": "openrouter", - "id": "anthropic/claude-3.7-sonnet" - }, - "evaluation_results": [ - { - "evaluation_name": "Hard Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Hard Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Medium Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Medium Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014084507042253521 - } - }, - { - "evaluation_name": "Easy Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Easy Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.15492957746478872 - } - } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" - ], - "evaluation_source": { - "evaluation_source_name": "Live Code Bench Pro", - "evaluation_source_type": "leaderboard" - } -} \ No newline at end of file diff --git a/leaderboard_data/LiveCodeBenchPro/ByteDance/bytedance_doubao-seed-1-6-thinking-250615/bfd991ca-13e9-4716-b389-11e0d2afe286.json b/leaderboard_data/LiveCodeBenchPro/ByteDance/bytedance_doubao-seed-1-6-thinking-250615/bfd991ca-13e9-4716-b389-11e0d2afe286.json deleted file mode 100644 index 4ad5d21433bbeaf36ce0abf4ea5792f672244cbe..0000000000000000000000000000000000000000 --- a/leaderboard_data/LiveCodeBenchPro/ByteDance/bytedance_doubao-seed-1-6-thinking-250615/bfd991ca-13e9-4716-b389-11e0d2afe286.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "livecodebenchpro/doubao-seed-1-6-thinking-250615/1760492095.8105888", - "retrieved_timestamp": "1760492095.8105888", - "source_metadata": { - "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "doubao-seed-1-6-thinking-250615", - "developer": "ByteDance", - "inference_platform": "ark", - "id": "bytedance/doubao-seed-1-6-thinking-250615" - }, - "evaluation_results": [ - { - "evaluation_name": "Hard Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Hard Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Medium Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Medium Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07042253521126761 - } - }, - { - "evaluation_name": "Easy Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Easy Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5774647887323944 - } - } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" - ], - "evaluation_source": { - "evaluation_source_name": "Live Code Bench Pro", - "evaluation_source_type": "leaderboard" - } -} \ No newline at end of file diff --git a/leaderboard_data/LiveCodeBenchPro/DeepSeek/deepseek_chat-v3-0324/b29b7c8e-759e-45fe-a9d3-1054f19af617.json b/leaderboard_data/LiveCodeBenchPro/DeepSeek/deepseek_chat-v3-0324/b29b7c8e-759e-45fe-a9d3-1054f19af617.json deleted file mode 100644 index 6ebf8ea9ad12f5e610522dc50b56744a51d4d59e..0000000000000000000000000000000000000000 --- a/leaderboard_data/LiveCodeBenchPro/DeepSeek/deepseek_chat-v3-0324/b29b7c8e-759e-45fe-a9d3-1054f19af617.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "livecodebenchpro/deepseek/chat-v3-0324/1760492095.8105888", - "retrieved_timestamp": "1760492095.8105888", - "source_metadata": { - "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "deepseek/chat-v3-0324", - "developer": "DeepSeek", - "inference_platform": "openrouter", - "id": "deepseek/chat-v3-0324" - }, - "evaluation_results": [ - { - "evaluation_name": "Hard Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Hard Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Medium Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Medium Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Easy Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Easy Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19718309859154928 - } - } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" - ], - "evaluation_source": { - "evaluation_source_name": "Live Code Bench Pro", - "evaluation_source_type": "leaderboard" - } -} \ No newline at end of file diff --git a/leaderboard_data/LiveCodeBenchPro/DeepSeek/deepseek_ep-20250214004308-p7n89/801d2dc6-17e7-47f1-a54f-87b94a59b508.json b/leaderboard_data/LiveCodeBenchPro/DeepSeek/deepseek_ep-20250214004308-p7n89/801d2dc6-17e7-47f1-a54f-87b94a59b508.json deleted file mode 100644 index 56bc33aa9bdb63a86b1cbcd6b7f1598d8414d6a4..0000000000000000000000000000000000000000 --- a/leaderboard_data/LiveCodeBenchPro/DeepSeek/deepseek_ep-20250214004308-p7n89/801d2dc6-17e7-47f1-a54f-87b94a59b508.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "livecodebenchpro/ep-20250214004308-p7n89/1760492095.8105888", - "retrieved_timestamp": "1760492095.8105888", - "source_metadata": { - "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ep-20250214004308-p7n89", - "developer": "DeepSeek", - "inference_platform": "ark", - "id": "deepseek/ep-20250214004308-p7n89" - }, - "evaluation_results": [ - { - "evaluation_name": "Hard Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Hard Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Medium Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Medium Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014084507042253521 - } - }, - { - "evaluation_name": "Easy Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Easy Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4225352112676056 - } - } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" - ], - "evaluation_source": { - "evaluation_source_name": "Live Code Bench Pro", - "evaluation_source_type": "leaderboard" - } -} \ No newline at end of file diff --git a/leaderboard_data/LiveCodeBenchPro/DeepSeek/deepseek_ep-20250228232227-z44x5/def0b2e3-cf5f-4dfd-8f1c-827f98d1626a.json b/leaderboard_data/LiveCodeBenchPro/DeepSeek/deepseek_ep-20250228232227-z44x5/def0b2e3-cf5f-4dfd-8f1c-827f98d1626a.json deleted file mode 100644 index 22b473b2eb7f040f18a67cd1df27073dcd2230c3..0000000000000000000000000000000000000000 --- a/leaderboard_data/LiveCodeBenchPro/DeepSeek/deepseek_ep-20250228232227-z44x5/def0b2e3-cf5f-4dfd-8f1c-827f98d1626a.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "livecodebenchpro/ep-20250228232227-z44x5/1760492095.8105888", - "retrieved_timestamp": "1760492095.8105888", - "source_metadata": { - "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ep-20250228232227-z44x5", - "developer": "DeepSeek", - "inference_platform": "ark", - "id": "deepseek/ep-20250228232227-z44x5" - }, - "evaluation_results": [ - { - "evaluation_name": "Hard Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Hard Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Medium Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Medium Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Easy Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Easy Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1267605633802817 - } - } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" - ], - "evaluation_source": { - "evaluation_source_name": "Live Code Bench Pro", - "evaluation_source_type": "leaderboard" - } -} \ No newline at end of file diff --git a/leaderboard_data/LiveCodeBenchPro/DeepSeek/deepseek_ep-20250603132404-cgpjm/157dd68b-fcc2-416f-a2c0-c9781020e6af.json b/leaderboard_data/LiveCodeBenchPro/DeepSeek/deepseek_ep-20250603132404-cgpjm/157dd68b-fcc2-416f-a2c0-c9781020e6af.json deleted file mode 100644 index 3d538e382b18f4a67b1cebac2aab40c268bdaec2..0000000000000000000000000000000000000000 --- a/leaderboard_data/LiveCodeBenchPro/DeepSeek/deepseek_ep-20250603132404-cgpjm/157dd68b-fcc2-416f-a2c0-c9781020e6af.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "livecodebenchpro/ep-20250603132404-cgpjm/1760492095.8105888", - "retrieved_timestamp": "1760492095.8105888", - "source_metadata": { - "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "ep-20250603132404-cgpjm", - "developer": "DeepSeek", - "inference_platform": "ark", - "id": "deepseek/ep-20250603132404-cgpjm" - }, - "evaluation_results": [ - { - "evaluation_name": "Hard Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Hard Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Medium Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Medium Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.08450704225352113 - } - }, - { - "evaluation_name": "Easy Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Easy Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5774647887323944 - } - } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" - ], - "evaluation_source": { - "evaluation_source_name": "Live Code Bench Pro", - "evaluation_source_type": "leaderboard" - } -} \ No newline at end of file diff --git a/leaderboard_data/LiveCodeBenchPro/Google/google_gemini-2.5-flash/174f0e23-84f1-43d0-bcdf-11b83c37025a.json b/leaderboard_data/LiveCodeBenchPro/Google/google_gemini-2.5-flash/174f0e23-84f1-43d0-bcdf-11b83c37025a.json deleted file mode 100644 index c1089bbfc25d62772b3126b722a2cd609584c08f..0000000000000000000000000000000000000000 --- a/leaderboard_data/LiveCodeBenchPro/Google/google_gemini-2.5-flash/174f0e23-84f1-43d0-bcdf-11b83c37025a.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "livecodebenchpro/google/gemini-2.5-flash/1760492095.8105888", - "retrieved_timestamp": "1760492095.8105888", - "source_metadata": { - "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "google/gemini-2.5-flash", - "developer": "Google", - "inference_platform": "openrouter", - "id": "google/gemini-2.5-flash" - }, - "evaluation_results": [ - { - "evaluation_name": "Hard Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Hard Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Medium Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Medium Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028169014084507043 - } - }, - { - "evaluation_name": "Easy Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Easy Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.38028169014084506 - } - } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" - ], - "evaluation_source": { - "evaluation_source_name": "Live Code Bench Pro", - "evaluation_source_type": "leaderboard" - } -} \ No newline at end of file diff --git a/leaderboard_data/LiveCodeBenchPro/Google/google_gemini-2.5-pro/bef7254b-549f-4e6b-b5c8-31b84dc6acda.json b/leaderboard_data/LiveCodeBenchPro/Google/google_gemini-2.5-pro/bef7254b-549f-4e6b-b5c8-31b84dc6acda.json deleted file mode 100644 index be0116d428683115b051d2321a63d42eec4fb749..0000000000000000000000000000000000000000 --- a/leaderboard_data/LiveCodeBenchPro/Google/google_gemini-2.5-pro/bef7254b-549f-4e6b-b5c8-31b84dc6acda.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "livecodebenchpro/gemini-2.5-pro/1760492095.8105888", - "retrieved_timestamp": "1760492095.8105888", - "source_metadata": { - "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "gemini-2.5-pro", - "developer": "Google", - "inference_platform": "google", - "id": "google/gemini-2.5-pro" - }, - "evaluation_results": [ - { - "evaluation_name": "Hard Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Hard Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014084507042253521 - } - }, - { - "evaluation_name": "Medium Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Medium Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.2112676056338028 - } - }, - { - "evaluation_name": "Easy Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Easy Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7183098591549296 - } - } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" - ], - "evaluation_source": { - "evaluation_source_name": "Live Code Bench Pro", - "evaluation_source_type": "leaderboard" - } -} \ No newline at end of file diff --git a/leaderboard_data/LiveCodeBenchPro/Kuaishou/kuaishou_kwaipilot-40b-0604/aa236b03-b81f-431b-b049-7101cea165f2.json b/leaderboard_data/LiveCodeBenchPro/Kuaishou/kuaishou_kwaipilot-40b-0604/aa236b03-b81f-431b-b049-7101cea165f2.json deleted file mode 100644 index aca812eb7ad7e2eeb88bf99cb16c3571eb195f73..0000000000000000000000000000000000000000 --- a/leaderboard_data/LiveCodeBenchPro/Kuaishou/kuaishou_kwaipilot-40b-0604/aa236b03-b81f-431b-b049-7101cea165f2.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "livecodebenchpro/kwaipilot-40b-0604/1760492095.8105888", - "retrieved_timestamp": "1760492095.8105888", - "source_metadata": { - "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "kwaipilot-40b-0604", - "developer": "Kuaishou", - "inference_platform": "kuaishou", - "id": "kuaishou/kwaipilot-40b-0604" - }, - "evaluation_results": [ - { - "evaluation_name": "Hard Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Hard Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Medium Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Medium Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07042253521126761 - } - }, - { - "evaluation_name": "Easy Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Easy Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.056338028169014086 - } - } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" - ], - "evaluation_source": { - "evaluation_source_name": "Live Code Bench Pro", - "evaluation_source_type": "leaderboard" - } -} \ No newline at end of file diff --git a/leaderboard_data/LiveCodeBenchPro/Meta/meta_llama-4-maverick/abc37028-a362-4e02-8499-1bb7497e0293.json b/leaderboard_data/LiveCodeBenchPro/Meta/meta_llama-4-maverick/abc37028-a362-4e02-8499-1bb7497e0293.json deleted file mode 100644 index 0e1db4f63394eb60d449b11733c292573c6b703c..0000000000000000000000000000000000000000 --- a/leaderboard_data/LiveCodeBenchPro/Meta/meta_llama-4-maverick/abc37028-a362-4e02-8499-1bb7497e0293.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "livecodebenchpro/meta/llama-4-maverick/1760492095.8105888", - "retrieved_timestamp": "1760492095.8105888", - "source_metadata": { - "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "meta/llama-4-maverick", - "developer": "Meta", - "inference_platform": "openrouter", - "id": "meta/llama-4-maverick" - }, - "evaluation_results": [ - { - "evaluation_name": "Hard Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Hard Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Medium Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Medium Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Easy Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Easy Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.09859154929577464 - } - } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" - ], - "evaluation_source": { - "evaluation_source_name": "Live Code Bench Pro", - "evaluation_source_type": "leaderboard" - } -} \ No newline at end of file diff --git a/leaderboard_data/LiveCodeBenchPro/OpenAI/openai_gpt-4.1/ba46ef91-d157-4984-b3df-ce33d8d97f8e.json b/leaderboard_data/LiveCodeBenchPro/OpenAI/openai_gpt-4.1/ba46ef91-d157-4984-b3df-ce33d8d97f8e.json deleted file mode 100644 index 89c70ef642f977a4c0d71021acc6d4f7c3d3c4d5..0000000000000000000000000000000000000000 --- a/leaderboard_data/LiveCodeBenchPro/OpenAI/openai_gpt-4.1/ba46ef91-d157-4984-b3df-ce33d8d97f8e.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "livecodebenchpro/openai/gpt-4.1/1760492095.8105888", - "retrieved_timestamp": "1760492095.8105888", - "source_metadata": { - "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "openai/gpt-4.1", - "developer": "OpenAI", - "inference_platform": "openrouter", - "id": "openai/gpt-4.1" - }, - "evaluation_results": [ - { - "evaluation_name": "Hard Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Hard Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Medium Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Medium Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Easy Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Easy Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.19718309859154928 - } - } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" - ], - "evaluation_source": { - "evaluation_source_name": "Live Code Bench Pro", - "evaluation_source_type": "leaderboard" - } -} \ No newline at end of file diff --git a/leaderboard_data/LiveCodeBenchPro/OpenAI/openai_gpt-4o-2024-11-20/e70acf51-30ef-4c20-b7cc-51704d114d70.json b/leaderboard_data/LiveCodeBenchPro/OpenAI/openai_gpt-4o-2024-11-20/e70acf51-30ef-4c20-b7cc-51704d114d70.json deleted file mode 100644 index a858bdd3c61a91d1a28735b5d31f93c612e4b76d..0000000000000000000000000000000000000000 --- a/leaderboard_data/LiveCodeBenchPro/OpenAI/openai_gpt-4o-2024-11-20/e70acf51-30ef-4c20-b7cc-51704d114d70.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "livecodebenchpro/openai/gpt-4o-2024-11-20/1760492095.8105888", - "retrieved_timestamp": "1760492095.8105888", - "source_metadata": { - "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "openai/gpt-4o-2024-11-20", - "developer": "OpenAI", - "inference_platform": "openrouter", - "id": "openai/gpt-4o-2024-11-20" - }, - "evaluation_results": [ - { - "evaluation_name": "Hard Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Hard Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Medium Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Medium Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Easy Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Easy Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.07042253521126761 - } - } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" - ], - "evaluation_source": { - "evaluation_source_name": "Live Code Bench Pro", - "evaluation_source_type": "leaderboard" - } -} \ No newline at end of file diff --git a/leaderboard_data/LiveCodeBenchPro/OpenAI/openai_gpt-5-2025-08-07/0e57aa1f-48c6-42b7-9aee-43a29d21b83f.json b/leaderboard_data/LiveCodeBenchPro/OpenAI/openai_gpt-5-2025-08-07/0e57aa1f-48c6-42b7-9aee-43a29d21b83f.json deleted file mode 100644 index 61b5691c69d3e6eb6a2af9df53b8048b81df83a8..0000000000000000000000000000000000000000 --- a/leaderboard_data/LiveCodeBenchPro/OpenAI/openai_gpt-5-2025-08-07/0e57aa1f-48c6-42b7-9aee-43a29d21b83f.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "livecodebenchpro/gpt-5-2025-08-07/1760492095.8105888", - "retrieved_timestamp": "1760492095.8105888", - "source_metadata": { - "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "gpt-5-2025-08-07", - "developer": "OpenAI", - "inference_platform": "openai", - "id": "openai/gpt-5-2025-08-07" - }, - "evaluation_results": [ - { - "evaluation_name": "Hard Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Hard Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.04225352112676056 - } - }, - { - "evaluation_name": "Medium Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Medium Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.4084507042253521 - } - }, - { - "evaluation_name": "Easy Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Easy Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8873239436619719 - } - } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" - ], - "evaluation_source": { - "evaluation_source_name": "Live Code Bench Pro", - "evaluation_source_type": "leaderboard" - } -} \ No newline at end of file diff --git a/leaderboard_data/LiveCodeBenchPro/OpenAI/openai_gpt-oss-120b/1dd8c827-72af-4c8f-9ead-989de7105590.json b/leaderboard_data/LiveCodeBenchPro/OpenAI/openai_gpt-oss-120b/1dd8c827-72af-4c8f-9ead-989de7105590.json deleted file mode 100644 index a0386d17236476214e59d375f24c9f3c33d9d729..0000000000000000000000000000000000000000 --- a/leaderboard_data/LiveCodeBenchPro/OpenAI/openai_gpt-oss-120b/1dd8c827-72af-4c8f-9ead-989de7105590.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "livecodebenchpro/openai/gpt-oss-120b/1760492095.8105888", - "retrieved_timestamp": "1760492095.8105888", - "source_metadata": { - "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "openai/gpt-oss-120b", - "developer": "OpenAI", - "inference_platform": "openrouter", - "id": "openai/gpt-oss-120b" - }, - "evaluation_results": [ - { - "evaluation_name": "Hard Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Hard Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Medium Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Medium Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.11267605633802817 - } - }, - { - "evaluation_name": "Easy Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Easy Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.6619718309859155 - } - } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" - ], - "evaluation_source": { - "evaluation_source_name": "Live Code Bench Pro", - "evaluation_source_type": "leaderboard" - } -} \ No newline at end of file diff --git a/leaderboard_data/LiveCodeBenchPro/OpenAI/openai_gpt-oss-20b/ead39f61-b408-42b2-808f-8421a3200c89.json b/leaderboard_data/LiveCodeBenchPro/OpenAI/openai_gpt-oss-20b/ead39f61-b408-42b2-808f-8421a3200c89.json deleted file mode 100644 index c847cc099016e2f6848c252eb3dea120ffc59960..0000000000000000000000000000000000000000 --- a/leaderboard_data/LiveCodeBenchPro/OpenAI/openai_gpt-oss-20b/ead39f61-b408-42b2-808f-8421a3200c89.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "livecodebenchpro/openai/gpt-oss-20b/1760492095.8105888", - "retrieved_timestamp": "1760492095.8105888", - "source_metadata": { - "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "openai/gpt-oss-20b", - "developer": "OpenAI", - "inference_platform": "openrouter", - "id": "openai/gpt-oss-20b" - }, - "evaluation_results": [ - { - "evaluation_name": "Hard Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Hard Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Medium Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Medium Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.056338028169014086 - } - }, - { - "evaluation_name": "Easy Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Easy Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.5070422535211268 - } - } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" - ], - "evaluation_source": { - "evaluation_source_name": "Live Code Bench Pro", - "evaluation_source_type": "leaderboard" - } -} \ No newline at end of file diff --git a/leaderboard_data/LiveCodeBenchPro/OpenAI/openai_o3-2025-04-16/f96bdb35-4d61-4fde-8d91-edf55f13dc03.json b/leaderboard_data/LiveCodeBenchPro/OpenAI/openai_o3-2025-04-16/f96bdb35-4d61-4fde-8d91-edf55f13dc03.json deleted file mode 100644 index b06122526f98e86e31b0dd486cc9e9de8a35a1ce..0000000000000000000000000000000000000000 --- a/leaderboard_data/LiveCodeBenchPro/OpenAI/openai_o3-2025-04-16/f96bdb35-4d61-4fde-8d91-edf55f13dc03.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "livecodebenchpro/o3-2025-04-16/1760492095.8105888", - "retrieved_timestamp": "1760492095.8105888", - "source_metadata": { - "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "o3-2025-04-16", - "developer": "OpenAI", - "inference_platform": "openai", - "id": "openai/o3-2025-04-16" - }, - "evaluation_results": [ - { - "evaluation_name": "Hard Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Hard Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Medium Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Medium Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.22535211267605634 - } - }, - { - "evaluation_name": "Easy Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Easy Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.7183098591549296 - } - } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" - ], - "evaluation_source": { - "evaluation_source_name": "Live Code Bench Pro", - "evaluation_source_type": "leaderboard" - } -} \ No newline at end of file diff --git a/leaderboard_data/LiveCodeBenchPro/OpenAI/openai_o4-mini-2025-04-16/8992cef5-df7e-40a1-b099-331532c3deb0.json b/leaderboard_data/LiveCodeBenchPro/OpenAI/openai_o4-mini-2025-04-16/8992cef5-df7e-40a1-b099-331532c3deb0.json deleted file mode 100644 index a5947563f2b6ee1234e876a358e37182e84207d6..0000000000000000000000000000000000000000 --- a/leaderboard_data/LiveCodeBenchPro/OpenAI/openai_o4-mini-2025-04-16/8992cef5-df7e-40a1-b099-331532c3deb0.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "livecodebenchpro/o4-mini-2025-04-16/1760492095.8105888", - "retrieved_timestamp": "1760492095.8105888", - "source_metadata": { - "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "o4-mini-2025-04-16", - "developer": "OpenAI", - "inference_platform": "openai", - "id": "openai/o4-mini-2025-04-16" - }, - "evaluation_results": [ - { - "evaluation_name": "Hard Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Hard Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.014084507042253521 - } - }, - { - "evaluation_name": "Medium Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Medium Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.30985915492957744 - } - }, - { - "evaluation_name": "Easy Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Easy Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.8873239436619719 - } - } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" - ], - "evaluation_source": { - "evaluation_source_name": "Live Code Bench Pro", - "evaluation_source_type": "leaderboard" - } -} \ No newline at end of file diff --git a/leaderboard_data/LiveCodeBenchPro/Z.AI/z-ai_glm-4.5/a77c08d6-a782-440c-b545-c60b6169712d.json b/leaderboard_data/LiveCodeBenchPro/Z.AI/z-ai_glm-4.5/a77c08d6-a782-440c-b545-c60b6169712d.json deleted file mode 100644 index e004ebec1c55a23da8bcf992965af53873c9ea63..0000000000000000000000000000000000000000 --- a/leaderboard_data/LiveCodeBenchPro/Z.AI/z-ai_glm-4.5/a77c08d6-a782-440c-b545-c60b6169712d.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "schema_version": "0.0.1", - "evaluation_id": "livecodebenchpro/z-ai/glm-4.5/1760492095.8105888", - "retrieved_timestamp": "1760492095.8105888", - "source_metadata": { - "source_organization_name": "New York University, Princeton University, University of California San Diego, University of Washington and Canyon Crest Academy", - "evaluator_relationship": "third_party" - }, - "model_info": { - "name": "z-ai/glm-4.5", - "developer": "Z.AI", - "inference_platform": "openrouter", - "id": "z-ai/glm-4.5" - }, - "evaluation_results": [ - { - "evaluation_name": "Hard Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Hard Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.0 - } - }, - { - "evaluation_name": "Medium Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Medium Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.028169014084507043 - } - }, - { - "evaluation_name": "Easy Problems", - "metric_config": { - "evaluation_description": "Pass@1 on Easy Problems", - "lower_is_better": false, - "score_type": "continuous", - "min_score": 0, - "max_score": 1 - }, - "score_details": { - "score": 0.1267605633802817 - } - } - ], - "source_data": [ - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=hard&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=medium&benchmark_mode=live", - "https://webhook.cp-bench.orzzh.com/leaderboard/llm/difficulty?difficulty=easy&benchmark_mode=live" - ], - "evaluation_source": { - "evaluation_source_name": "Live Code Bench Pro", - "evaluation_source_type": "leaderboard" - } -} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..817ee6ed08464a453409ce0a8a6b7bc5293f8841 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +[project] +name = "eee-test" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ + "gradio>=5.49.1", + "pandas>=2.3.2", +] diff --git a/scripts/convert_to_parquet.py b/scripts/convert_to_parquet.py new file mode 100644 index 0000000000000000000000000000000000000000..6fd401996506a5d00249ec4cd90f1f73b4d28b1f --- /dev/null +++ b/scripts/convert_to_parquet.py @@ -0,0 +1,142 @@ +""" +Incremental parquet conversion with HuggingFace sync. + +Optimized workflow: +1. Detect changed leaderboards via git diff (instant!) +2. Download ONLY changed parquets from HF (fast!) +3. Re-convert ONLY changed leaderboards +4. Ready for upload (handled by upload_to_hf.py) + +This avoids downloading and processing unchanged leaderboards. +""" + +from pathlib import Path +import sys +import subprocess +import os +import json +from datasets import load_dataset + +sys.path.insert(0, str(Path(__file__).parent.resolve().parent)) + +from json_to_parquet import add_to_parquet + +HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "deepmage121/eee_test") + +def download_leaderboards(output_dir: Path, leaderboard_names: set[str]) -> set[str]: + """Download existing leaderboard parquets from HuggingFace.""" + try: + dataset_dict = load_dataset(HF_DATASET_REPO) + downloaded: set[str] = set() + + for lb in leaderboard_names: + if lb in dataset_dict: + print(f" Downloading {lb}") + dataset_dict[lb].to_pandas().to_parquet(output_dir / f"{lb}.parquet", index=False) + downloaded.add(lb) + else: + print(f" {lb} (new)") + + print(f"Downloaded {len(downloaded)}/{len(leaderboard_names)} parquet(s)") + return downloaded + + except Exception as e: + print(f"HF download failed: {e}") + sys.exit(1) + + +def detect_modified_leaderboards() -> set[str]: + """Get leaderboards with changed JSONs via git diff (HEAD~1).""" + try: + result = subprocess.run( + ["git", "diff", "--name-only", "HEAD~1", "HEAD", "data/"], + capture_output=True, text=True, check=True + ) + + changed_files = result.stdout.strip().split('\n') + if not changed_files or changed_files == ['']: + print("No changes detected in data/") + return set() + + leaderboards = { + Path(f).parts[1] + for f in changed_files + if f.startswith('data/') and f.endswith('.json') and len(Path(f).parts) >= 2 + } + return leaderboards + + except subprocess.CalledProcessError as e: + print(f"ERROR: Git command failed: {e}") + sys.exit(1) + + +def convert_changed_leaderboards(): + """ + Optimized conversion: detect changes, download only changed, re-convert only changed. + """ + + data_dir = Path("data") + output_dir = Path("parquet_output") + output_dir.mkdir(exist_ok=True) + + if not data_dir.exists(): + print(f"Data directory not found: {data_dir}") + sys.exit(1) + + changed_leaderboards: set[str] = detect_modified_leaderboards() + + if len(changed_leaderboards) == 0: + print("No changes detected, nothing to upload") + manifest = {"changed": [], "converted": []} + (output_dir / "changed_leaderboards.json").write_text(json.dumps(manifest, indent=2)) + sys.exit(0) + + print(f"Detected {len(changed_leaderboards)} changed leaderboard(s):") + for lb in changed_leaderboards: + print(f" {lb}") + + downloaded = download_leaderboards(output_dir, changed_leaderboards) + + converted_count = 0 + error_count = 0 + converted_leaderboards = [] + + for leaderboard_name in changed_leaderboards: + leaderboard_dir = os.path.join(data_dir, leaderboard_name) + + parquet_path = os.path.join(output_dir, f"{leaderboard_name}.parquet") + + print(f"\nConverting: {leaderboard_name}") + + try: + add_to_parquet(json_or_folder=str(leaderboard_dir), parquet_file=str(parquet_path)) + + print(f" Converted to {parquet_path}") + converted_count += 1 + converted_leaderboards.append(leaderboard_name) + + except Exception as e: + print(f" Error: {e}") + error_count += 1 + + manifest = { + "changed": list(changed_leaderboards), + "converted": converted_leaderboards, + "downloaded": list(downloaded), + "errors": error_count + } + manifest_path = os.path.join(output_dir, "changed_leaderboards.json") + with open(manifest_path, 'w') as f: + json.dump(manifest, f, indent=2) + + if error_count > 0: + sys.exit(1) + + if converted_count == 0: + print("Warning: No parquet files successfully converted!") + sys.exit(1) + + +if __name__ == "__main__": + convert_changed_leaderboards() + diff --git a/scripts/json_to_parquet.py b/scripts/json_to_parquet.py new file mode 100644 index 0000000000000000000000000000000000000000..b12fbf8a57500e8e5a2e0824effe2e9f177daba0 --- /dev/null +++ b/scripts/json_to_parquet.py @@ -0,0 +1,222 @@ +""" +Convert evaluation JSONs to Parquet for HF Datasets. +Input: single JSON or folder of JSONs (any structure) +Output: Parquet with all data + reconstructable folder structure +""" + +import json +from pathlib import Path +import pandas as pd + + +def json_to_row(json_path: Path) -> dict: + """Convert one JSON to a single row (1 JSON = 1 row, evaluations as columns).""" + with open(json_path, 'r') as f: + data = json.load(f) + + required_fields = ["schema_version", "evaluation_id", "evaluation_source", "retrieved_timestamp", + "source_data", "source_metadata", "model_info", "evaluation_results"] + for field in required_fields: + if field not in data: + raise ValueError(f"{json_path}: Missing required field '{field}'") + + if "evaluation_source_name" not in data["evaluation_source"]: + raise ValueError(f"{json_path}: Missing required field 'evaluation_source.evaluation_source_name'") + if "evaluation_source_type" not in data["evaluation_source"]: + raise ValueError(f"{json_path}: Missing required field 'evaluation_source.evaluation_source_type'") + + if "source_organization_name" not in data["source_metadata"]: + raise ValueError(f"{json_path}: Missing required field 'source_metadata.source_organization_name'") + if "evaluator_relationship" not in data["source_metadata"]: + raise ValueError(f"{json_path}: Missing required field 'source_metadata.evaluator_relationship'") + + if "name" not in data["model_info"]: + raise ValueError(f"{json_path}: Missing required field 'model_info.name'") + if "id" not in data["model_info"]: + raise ValueError(f"{json_path}: Missing required field 'model_info.id'") + if "developer" not in data["model_info"]: + raise ValueError(f"{json_path}: Missing required field 'model_info.developer'") + + leaderboard = data["evaluation_source"]["evaluation_source_name"] + model = data["model_info"]["id"] + uuid = json_path.stem + developer = data["model_info"]["developer"] + + # Validate evaluation results + for eval_result in data["evaluation_results"]: + if "evaluation_name" not in eval_result: + raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].evaluation_name'") + if "metric_config" not in eval_result: + raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].metric_config'") + if "score_details" not in eval_result: + raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].score_details'") + + if "lower_is_better" not in eval_result["metric_config"]: + raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].metric_config.lower_is_better'") + if "score" not in eval_result["score_details"]: + raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].score_details.score'") + + row = { + # Folder structure (for reconstruction) + "_leaderboard": leaderboard, + "_developer": developer, + "_model": model, + "_uuid": uuid, + + # Required top-level fields + "schema_version": data["schema_version"], + "evaluation_id": data["evaluation_id"], + "retrieved_timestamp": data["retrieved_timestamp"], + "source_data": json.dumps(data["source_data"]), + + # Required nested fields + "evaluation_source_name": data["evaluation_source"]["evaluation_source_name"], + "evaluation_source_type": data["evaluation_source"]["evaluation_source_type"], + + "source_organization_name": data["source_metadata"]["source_organization_name"], + "source_organization_url": data["source_metadata"].get("source_organization_url"), + "source_organization_logo_url": data["source_metadata"].get("source_organization_logo_url"), + "evaluator_relationship": data["source_metadata"]["evaluator_relationship"], + + "model_name": data["model_info"]["name"], + "model_id": data["model_info"]["id"], + "model_developer": data["model_info"]["developer"], + "model_inference_platform": data["model_info"].get("inference_platform"), + + # Store full evaluation_results and additional_details as JSON + "evaluation_results": json.dumps(data["evaluation_results"]), + "additional_details": json.dumps(data["additional_details"]) if "additional_details" in data else None, + } + + return row + + +def add_to_parquet(json_input: str, parquet_file: str): + """ + Add JSON(s) to Parquet file. + Creates new file if it doesn't exist, appends and deduplicates if it does. + + Args: + json_input: Path to single JSON file or folder containing JSONs + parquet_file: Output Parquet file path + """ + input_path = Path(json_input) + + if input_path.is_file(): + json_files = [input_path] + elif input_path.is_dir(): + json_files = list(input_path.rglob("*.json")) + else: + raise ValueError(f"Invalid input: {json_input}") + + print(f"Processing {len(json_files)} JSON file(s)...") + + parquet_path = Path(parquet_file) + if parquet_path.exists(): + existing_df = pd.read_parquet(parquet_file) + existing_keys = set( + existing_df[["_leaderboard", "_developer", "_model", "_uuid"]] + .apply(tuple, axis=1) + ) + print(f"Found {len(existing_df)} existing rows") + else: + existing_df = None + existing_keys = set() + + all_rows = [] + skipped = 0 + for i, jf in enumerate(json_files, 1): + if i % 100 == 0: + print(f" {i}/{len(json_files)}") + + row = json_to_row(jf) + key = (row["_leaderboard"], row["_developer"], row["_model"], row["_uuid"]) + if key not in existing_keys: + all_rows.append(row) + existing_keys.add(key) + else: + skipped += 1 + + if skipped > 0: + print(f" Skipped {skipped} duplicate file(s)") + + new_df = pd.DataFrame(all_rows) + + if existing_df is not None: + df = pd.concat([existing_df, new_df], ignore_index=True) + print(f"Added {len(new_df)} new file(s) to existing {len(existing_df)} file(s)") + else: + df = new_df + + df.to_parquet(parquet_file, index=False) + print(f"Saved {len(df)} total file(s) to {parquet_file} ({parquet_path.stat().st_size / 1024 / 1024:.1f} MB)") + + +def parquet_to_folder(parquet_file: str, output_dir: str): + """Reconstruct folder structure from Parquet.""" + df = pd.read_parquet(parquet_file) + out = Path(output_dir) + + for _, row in df.iterrows(): + lb = row["_leaderboard"] + dev = row["_developer"] + model = row["_model"] + uuid = row["_uuid"] + + json_data = { + "schema_version": row["schema_version"], + "evaluation_id": row["evaluation_id"], + "retrieved_timestamp": row["retrieved_timestamp"], + "source_data": json.loads(row["source_data"]), + "evaluation_source": { + "evaluation_source_name": row["evaluation_source_name"], + "evaluation_source_type": row["evaluation_source_type"] + }, + "source_metadata": { + "source_organization_name": row["source_organization_name"], + "evaluator_relationship": row["evaluator_relationship"] + }, + "model_info": { + "name": row["model_name"], + "id": row["model_id"], + "developer": row["model_developer"] + }, + "evaluation_results": json.loads(row["evaluation_results"]) + } + + if pd.notna(row["source_organization_url"]): + json_data["source_metadata"]["source_organization_url"] = row["source_organization_url"] + if pd.notna(row["source_organization_logo_url"]): + json_data["source_metadata"]["source_organization_logo_url"] = row["source_organization_logo_url"] + + if pd.notna(row["model_inference_platform"]): + json_data["model_info"]["inference_platform"] = row["model_inference_platform"] + + if pd.notna(row["additional_details"]): + json_data["additional_details"] = json.loads(row["additional_details"]) + + file_path = out / lb / dev / model / f"{uuid}.json" + file_path.parent.mkdir(parents=True, exist_ok=True) + with open(file_path, 'w') as f: + json.dump(json_data, f, indent=2) + + print(f"Reconstructed {len(df)} files to {output_dir}") + + +if __name__ == "__main__": + import sys + + if len(sys.argv) < 2: + print("Usage:") + print(" python json_to_parquet.py add ") + print(" python json_to_parquet.py export ") + sys.exit(1) + + cmd = sys.argv[1] + + if cmd == "add": + add_to_parquet(sys.argv[2], sys.argv[3]) + elif cmd == "export": + parquet_to_folder(sys.argv[2], sys.argv[3]) + else: + print(f"Unknown command: {cmd}") diff --git a/ui_components.py b/ui_components.py new file mode 100644 index 0000000000000000000000000000000000000000..62097bd539b6df28c27973ddce2264fc70df5e1b --- /dev/null +++ b/ui_components.py @@ -0,0 +1,211 @@ +""" +UI Components: Themes, CSS, and HTML formatters for the Gradio interface. +""" +import gradio as gr + + +def get_theme(): + """Returns the custom Gradio theme.""" + return gr.themes.Soft( + primary_hue="slate", + neutral_hue="slate", + font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"] + ).set( + body_background_fill="var(--neutral-50)", + block_background_fill="white", + block_border_width="1px", + block_title_text_weight="600" + ) + + +def get_custom_css(): + """Returns custom CSS for the interface.""" + return """ +/* Clean up the global container */ +.gradio-container { + max-width: 100% !important; + padding: 0 2rem !important; +} + +/* Hide file list in uploaders */ +.file-preview { + display: none !important; +} + +/* Ensure details elements work independently */ +details { + position: relative; + isolation: isolate; +} + +details summary { + cursor: pointer; +} +""" + + +def format_leaderboard_header(selected_leaderboard, metadata): + """Formats the leaderboard header info (goes at top).""" + if not selected_leaderboard: + return """ +
+

👋 Welcome to Eval Leaderboard

+

Select a leaderboard above to visualize results and metadata.

+
+ """ + + if not metadata or not metadata.get("evals"): + return f"""
No metadata found for {selected_leaderboard}
""" + + source_info = metadata.get("source_info", {}) + org = source_info.get("organization", "Unknown") + relationship = source_info.get("relationship", "Unknown").replace("_", " ").title() + url = source_info.get("url", "#") + eval_names = list(metadata["evals"].keys()) + + # Create badges for evaluations + eval_badges = "".join([f""" + {name} + """ for name in eval_names]) + + return f""" +
+
+
+

+ {selected_leaderboard} +

+
+ Source Organization: {org} â€ĸ + Evaluator Relationship: {relationship} +
+
+
+ Included Evaluations: +
+
{eval_badges}
+
+
+ + Source + +
+
+ """ + + +def format_metric_details(selected_leaderboard, metadata): + """Formats metric detail cards (goes below table).""" + if not selected_leaderboard or not metadata or not metadata.get("evals"): + return "" + + evals = metadata.get("evals", {}) + + html = """ +

+ 📏 Metric Details +

+ +
+ """ + + for eval_name, info in evals.items(): + score_type = info['score_type'].upper() if info.get('score_type') else "UNKNOWN" + direction = "Lower is better" if info.get('lower_is_better') else "Higher is better" + direction_icon = "↓" if info.get('lower_is_better') else "↑" + + details_content = "" + if info.get('score_type') == "continuous" and info.get('min_score') is not None: + details_content += f"
Range: [{info['min_score']} - {info['max_score']}]
" + elif info.get('score_type') == "levels" and info.get('level_names'): + levels = ", ".join(str(l) for l in info['level_names']) + details_content += f"
Levels: {levels}
" + + if info.get('has_unknown_level'): + details_content += "
* -1 indicates Unknown
" + + html += f""" +
+ +
+ đŸˇī¸ + {eval_name} +
+
+ {direction_icon} {direction} +
+
+ +
+

+ {info['description']} +

+
+
+ {details_content} +
+ {score_type} +
+
+
+ """ + + html += "
" + return html + diff --git a/upload_to_hf.py b/upload_to_hf.py new file mode 100644 index 0000000000000000000000000000000000000000..471d76b5165721cc878dfb7cdc2dbb8549b7b403 --- /dev/null +++ b/upload_to_hf.py @@ -0,0 +1,122 @@ +""" +Upload changed parquet files to HuggingFace dataset. + +This script: +1. Reads the manifest of changed leaderboards +2. Uploads ONLY the changed parquet files +3. Uses HfApi for efficient individual file uploads + +Usage: + # With HF_TOKEN environment variable (GitHub Actions): + python upload_to_hf.py + + # Interactive login (local): + python upload_to_hf.py --login +""" + +from huggingface_hub import login, HfFolder, HfApi +import pandas as pd +from pathlib import Path +import sys +import os +import json + +HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "deepmage121/eee_test") +PARQUET_DIR = Path("parquet_output") +MANIFEST_PATH = PARQUET_DIR / "changed_leaderboards.json" + +def upload_changed_parquets(): + """ + Upload only changed parquet files from manifest. + """ + + hf_token = os.environ.get("HF_TOKEN") + if hf_token: + print("Using HF_TOKEN from environment") + HfFolder.save_token(hf_token) + elif "--login" in sys.argv: + print("Logging in to HuggingFace...") + login() + else: + if not HfFolder.get_token(): + print("ERROR: Not logged in. Run with --login flag or set HF_TOKEN environment variable") + sys.exit(1) + print("Using existing HuggingFace token") + + api = HfApi() + + if not MANIFEST_PATH.exists(): + print(f"ERROR: No manifest found at {MANIFEST_PATH}") + print("Run convert_changed_to_parquet.py first to generate the manifest") + sys.exit(1) + + manifest = json.loads(MANIFEST_PATH.read_text()) + converted_leaderboards = manifest.get("converted", []) + + if not converted_leaderboards: + print("\nNo changed leaderboards to upload (per manifest)") + sys.exit(0) + + print(f"\nManifest found: {len(converted_leaderboards)} leaderboard(s) to upload") + + files_to_upload = [ + PARQUET_DIR / f"{lb}.parquet" + for lb in converted_leaderboards + ] + + files_to_upload = [f for f in files_to_upload if f.exists()] + + if not files_to_upload: + print(f"ERROR: No parquet files to upload in {PARQUET_DIR}") + sys.exit(1) + + print(f"\nUploading {len(files_to_upload)} parquet file(s):") + for pf in files_to_upload: + print(f" - {pf.stem}") + + uploaded_count = 0 + error_count = 0 + + for parquet_file in files_to_upload: + leaderboard_name = parquet_file.stem + + path_in_repo = f"data/{leaderboard_name}/data-00000-of-00001.parquet" + + try: + print(f"\nUploading: {leaderboard_name}") + + df = pd.read_parquet(parquet_file) + print(f" {len(df)} rows, {len(df.columns)} columns") + + api.upload_file( + path_or_fileobj=str(parquet_file), + path_in_repo=path_in_repo, + repo_id=HF_DATASET_REPO, + repo_type="dataset", + commit_message=f"Update {leaderboard_name} leaderboard data" + ) + + print(f" SUCCESS: Uploaded → {path_in_repo}") + uploaded_count += 1 + + except Exception as e: + print(f" ERROR: Error uploading {leaderboard_name}: {e}") + error_count += 1 + + print(f"\n{'='*70}") + print(f"Upload Summary:") + print(f"{'='*70}") + print(f" Successfully uploaded: {uploaded_count} file(s)") + print(f" Errors: {error_count} file(s)") + print(f"{'='*70}") + + if error_count > 0: + print(f"\nWARNING: {error_count} file(s) failed to upload") + sys.exit(1) + + print(f"\nSuccessfully uploaded to HuggingFace!") + print(f"View at: https://huggingface.co/datasets/{HF_DATASET_REPO}") + + +if __name__ == "__main__": + upload_changed_parquets() \ No newline at end of file